/[pcre]/code/trunk/pcre.c
ViewVC logotype

Contents of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (hide annotations) (download)
Sat Feb 24 21:38:33 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 104324 byte(s)
Load pcre-1.08 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12 nigel 15 Copyright (c) 1998 University of Cambridge
13 nigel 3
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Define DEBUG to get debugging output on stdout. */
33    
34     /* #define DEBUG */
35    
36 nigel 9 /* Use a macro for debugging printing, 'cause that eliminates the the use
37     of #ifdef inline, and there are *still* stupid compilers about that don't like
38     indented pre-processor statements. I suppose it's only been 10 years... */
39 nigel 3
40 nigel 9 #ifdef DEBUG
41     #define DPRINTF(p) printf p
42     #else
43     #define DPRINTF(p) /*nothing*/
44     #endif
45    
46 nigel 3 /* Include the internals header, which itself includes Standard C headers plus
47     the external pcre header. */
48    
49     #include "internal.h"
50    
51    
52 nigel 15 /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54     #ifdef __cplusplus
55     #define class pcre_class
56     #endif
57    
58    
59 nigel 3 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61 nigel 15 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63 nigel 3
64 nigel 13 /* Text forms of OP_ values and things, for debugging (not all used) */
65 nigel 3
66     #ifdef DEBUG
67 nigel 7 static const char *OP_names[] = {
68     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70     "not",
71     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74     "*", "*?", "+", "+?", "?", "??", "{", "{",
75 nigel 13 "class", "negclass", "Ref",
76 nigel 3 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77     "Brazero", "Braminzero", "Bra"
78     };
79     #endif
80    
81     /* Table for handling escaped characters in the range '0'-'z'. Positive returns
82     are simple data values; negative values are for special things like \d and so
83     on. Zero means further processing is needed (for things like \x), or the escape
84     is invalid. */
85    
86 nigel 15 static const short int escapes[] = {
87 nigel 3 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
88     0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
89     '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
90     0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
91     0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
92     0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
93     '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
94     0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
95     0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
96     0, 0, 0 /* x - z */
97     };
98    
99     /* Definition to allow mutual recursion */
100    
101 nigel 13 static BOOL
102     compile_regex(int, int *, uschar **, const uschar **, const char **);
103 nigel 3
104     /* Structure for passing "static" information around between the functions
105     doing the matching, so that they are thread-safe. */
106    
107     typedef struct match_data {
108     int errorcode; /* As it says */
109     int *offset_vector; /* Offset vector */
110     int offset_end; /* One past the end */
111     BOOL offset_overflow; /* Set if too many extractions */
112     BOOL caseless; /* Case-independent flag */
113     BOOL runtime_caseless; /* Caseless forced at run time */
114     BOOL multiline; /* Multiline flag */
115     BOOL notbol; /* NOTBOL flag */
116     BOOL noteol; /* NOTEOL flag */
117     BOOL dotall; /* Dot matches any char */
118     BOOL endonly; /* Dollar not before final \n */
119 nigel 7 const uschar *start_subject; /* Start of the subject string */
120     const uschar *end_subject; /* End of the subject string */
121 nigel 3 jmp_buf fail_env; /* Environment for longjump() break out */
122 nigel 7 const uschar *end_match_ptr; /* Subject position at end match */
123 nigel 3 int end_offset_top; /* Highwater mark at end of match */
124     } match_data;
125    
126    
127    
128     /*************************************************
129     * Global variables *
130     *************************************************/
131    
132     /* PCRE is thread-clean and doesn't use any global variables in the normal
133     sense. However, it calls memory allocation and free functions via the two
134     indirections below, which are can be changed by the caller, but are shared
135     between all threads. */
136    
137     void *(*pcre_malloc)(size_t) = malloc;
138     void (*pcre_free)(void *) = free;
139    
140    
141    
142    
143     /*************************************************
144     * Return version string *
145     *************************************************/
146    
147 nigel 7 const char *
148 nigel 3 pcre_version(void)
149     {
150     return PCRE_VERSION;
151     }
152    
153    
154    
155    
156     /*************************************************
157     * Return info about a compiled pattern *
158     *************************************************/
159    
160     /* This function picks potentially useful data out of the private
161     structure.
162    
163     Arguments:
164     external_re points to compiled code
165     optptr where to pass back the options
166     first_char where to pass back the first character,
167     or -1 if multiline and all branches start ^,
168     or -2 otherwise
169    
170     Returns: number of identifying extraction brackets
171     or negative values on error
172     */
173    
174     int
175     pcre_info(const pcre *external_re, int *optptr, int *first_char)
176     {
177 nigel 7 const real_pcre *re = (const real_pcre *)external_re;
178 nigel 3 if (re == NULL) return PCRE_ERROR_NULL;
179     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180     if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
181     if (first_char != NULL)
182     *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
183     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
184     return re->top_bracket;
185     }
186    
187    
188    
189    
190     #ifdef DEBUG
191     /*************************************************
192     * Debugging function to print chars *
193     *************************************************/
194    
195     /* Print a sequence of chars in printable format, stopping at the end of the
196     subject if the requested.
197    
198     Arguments:
199     p points to characters
200     length number to print
201     is_subject TRUE if printing from within md->start_subject
202     md pointer to matching data block, if is_subject is TRUE
203    
204     Returns: nothing
205     */
206    
207 nigel 9 static void
208     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209 nigel 3 {
210     int c;
211     if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
212     while (length-- > 0)
213     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
214     }
215     #endif
216    
217    
218    
219    
220     /*************************************************
221     * Check subpattern for empty operand *
222     *************************************************/
223    
224     /* This function checks a bracketed subpattern to see if any of the paths
225     through it could match an empty string. This is used to diagnose an error if
226     such a subpattern is followed by a quantifier with an unlimited upper bound.
227    
228     Argument:
229     code points to the opening bracket
230    
231     Returns: TRUE or FALSE
232     */
233    
234     static BOOL
235     could_be_empty(uschar *code)
236     {
237     do {
238     uschar *cc = code + 3;
239    
240     /* Scan along the opcodes for this branch; as soon as we find something
241     that matches a non-empty string, break out and advance to test the next
242     branch. If we get to the end of the branch, return TRUE for the whole
243     sub-expression. */
244    
245     for (;;)
246     {
247     /* Test an embedded subpattern; if it could not be empty, break the
248     loop. Otherwise carry on in the branch. */
249    
250 nigel 5 if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
251 nigel 3 {
252     if (!could_be_empty(cc)) break;
253     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
254     cc += 3;
255     }
256    
257     else switch (*cc)
258     {
259     /* Reached end of a branch: the subpattern may match the empty string */
260    
261     case OP_ALT:
262     case OP_KET:
263     case OP_KETRMAX:
264     case OP_KETRMIN:
265     return TRUE;
266    
267 nigel 17 /* Skip over entire bracket groups with zero lower bound */
268    
269     case OP_BRAZERO:
270     case OP_BRAMINZERO:
271     cc++;
272     /* Fall through */
273    
274 nigel 3 /* Skip over assertive subpatterns */
275    
276     case OP_ASSERT:
277     case OP_ASSERT_NOT:
278     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
279     cc += 3;
280     break;
281    
282     /* Skip over things that don't match chars */
283    
284     case OP_SOD:
285     case OP_EOD:
286     case OP_CIRC:
287     case OP_DOLL:
288     case OP_NOT_WORD_BOUNDARY:
289     case OP_WORD_BOUNDARY:
290     cc++;
291     break;
292    
293     /* Skip over simple repeats with zero lower bound */
294    
295     case OP_STAR:
296     case OP_MINSTAR:
297     case OP_QUERY:
298     case OP_MINQUERY:
299 nigel 5 case OP_NOTSTAR:
300     case OP_NOTMINSTAR:
301     case OP_NOTQUERY:
302     case OP_NOTMINQUERY:
303 nigel 3 case OP_TYPESTAR:
304     case OP_TYPEMINSTAR:
305     case OP_TYPEQUERY:
306     case OP_TYPEMINQUERY:
307     cc += 2;
308     break;
309    
310     /* Skip over UPTOs (lower bound is zero) */
311    
312     case OP_UPTO:
313     case OP_MINUPTO:
314     case OP_TYPEUPTO:
315     case OP_TYPEMINUPTO:
316     cc += 4;
317     break;
318    
319     /* Check a class or a back reference for a zero minimum */
320    
321     case OP_CLASS:
322 nigel 13 case OP_NEGCLASS:
323 nigel 3 case OP_REF:
324 nigel 5 cc += (*cc == OP_REF)? 2 : 33;
325 nigel 3
326     switch (*cc)
327     {
328     case OP_CRSTAR:
329     case OP_CRMINSTAR:
330     case OP_CRQUERY:
331     case OP_CRMINQUERY:
332     cc++;
333     break;
334    
335     case OP_CRRANGE:
336     case OP_CRMINRANGE:
337     if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;
338     cc += 3;
339     break;
340    
341     default:
342     goto NEXT_BRANCH;
343     }
344     break;
345    
346     /* Anything else matches at least one character */
347    
348     default:
349     goto NEXT_BRANCH;
350     }
351     }
352    
353     NEXT_BRANCH:
354     code += (code[1] << 8) + code[2];
355     }
356     while (*code == OP_ALT);
357    
358     /* No branches match the empty string */
359    
360     return FALSE;
361     }
362    
363    
364    
365     /*************************************************
366     * Handle escapes *
367     *************************************************/
368    
369     /* This function is called when a \ has been encountered. It either returns a
370     positive value for a simple escape such as \n, or a negative value which
371     encodes one of the more complicated things such as \d. On entry, ptr is
372     pointing at the \. On exit, it is on the final character of the escape
373     sequence.
374    
375     Arguments:
376     ptrptr points to the pattern position pointer
377     errorptr points to the pointer to the error message
378     bracount number of previous extracting brackets
379     options the options bits
380     isclass TRUE if inside a character class
381    
382     Returns: zero or positive => a data character
383     negative => a special escape sequence
384     on error, errorptr is set
385     */
386    
387     static int
388 nigel 7 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
389     int options, BOOL isclass)
390 nigel 3 {
391 nigel 7 const uschar *ptr = *ptrptr;
392 nigel 3 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
393     int i;
394    
395     if (c == 0) *errorptr = ERR1;
396    
397     /* Digits or letters may have special meaning; all others are literals. */
398    
399     else if (c < '0' || c > 'z') {}
400    
401     /* Do an initial lookup in a table. A non-zero result is something that can be
402     returned immediately. Otherwise further processing may be required. */
403    
404     else if ((i = escapes[c - '0']) != 0) c = i;
405    
406     /* Escapes that need further processing, or are illegal. */
407    
408     else
409     {
410 nigel 7 const uschar *oldptr;
411 nigel 3 switch (c)
412     {
413     /* The handling of escape sequences consisting of a string of digits
414     starting with one that is not zero is not straightforward. By experiment,
415     the way Perl works seems to be as follows:
416    
417     Outside a character class, the digits are read as a decimal number. If the
418     number is less than 10, or if there are that many previous extracting
419     left brackets, then it is a back reference. Otherwise, up to three octal
420     digits are read to form an escaped byte. Thus \123 is likely to be octal
421     123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
422     value is greater than 377, the least significant 8 bits are taken. Inside a
423     character class, \ followed by a digit is always an octal number. */
424    
425     case '1': case '2': case '3': case '4': case '5':
426     case '6': case '7': case '8': case '9':
427    
428     if (!isclass)
429     {
430     oldptr = ptr;
431     c -= '0';
432     while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)
433     c = c * 10 + *(++ptr) - '0';
434     if (c < 10 || c <= bracount)
435     {
436     c = -(ESC_REF + c);
437     break;
438     }
439     ptr = oldptr; /* Put the pointer back and fall through */
440     }
441    
442     /* Handle an octal number following \. If the first digit is 8 or 9, Perl
443     generates a binary zero byte and treats the digit as a following literal.
444     Thus we have to pull back the pointer by one. */
445    
446     if ((c = *ptr) >= '8')
447     {
448     ptr--;
449     c = 0;
450     break;
451     }
452    
453     /* \0 always starts an octal number, but we may drop through to here with a
454     larger first octal digit */
455    
456     case '0':
457     c -= '0';
458     while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&
459     ptr[1] != '8' && ptr[1] != '9')
460     c = c * 8 + *(++ptr) - '0';
461     break;
462    
463     /* Special escapes not starting with a digit are straightforward */
464    
465     case 'x':
466     c = 0;
467     while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)
468     {
469     ptr++;
470     c = c * 16 + pcre_lcc[*ptr] -
471     (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
472     }
473     break;
474    
475     case 'c':
476     c = *(++ptr);
477     if (c == 0)
478     {
479     *errorptr = ERR2;
480     return 0;
481     }
482    
483     /* A letter is upper-cased; then the 0x40 bit is flipped */
484    
485     if (c >= 'a' && c <= 'z') c = pcre_fcc[c];
486     c ^= 0x40;
487     break;
488    
489     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
490     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
491     for Perl compatibility, it is a literal. */
492    
493     default:
494     if ((options & PCRE_EXTRA) != 0) switch(c)
495     {
496     case 'X':
497     c = -ESC_X; /* This could be a lookup if it ever got into Perl */
498     break;
499    
500     default:
501     *errorptr = ERR3;
502     break;
503     }
504     break;
505     }
506     }
507    
508     *ptrptr = ptr;
509     return c;
510     }
511    
512    
513    
514     /*************************************************
515     * Check for counted repeat *
516     *************************************************/
517    
518     /* This function is called when a '{' is encountered in a place where it might
519     start a quantifier. It looks ahead to see if it really is a quantifier or not.
520     It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
521     where the ddds are digits.
522    
523     Arguments:
524     p pointer to the first char after '{'
525    
526     Returns: TRUE or FALSE
527     */
528    
529     static BOOL
530 nigel 7 is_counted_repeat(const uschar *p)
531 nigel 3 {
532     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
533     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
534     if (*p == '}') return TRUE;
535    
536     if (*p++ != ',') return FALSE;
537     if (*p == '}') return TRUE;
538    
539     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
540     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
541     return (*p == '}');
542     }
543    
544    
545    
546     /*************************************************
547     * Read repeat counts *
548     *************************************************/
549    
550     /* Read an item of the form {n,m} and return the values. This is called only
551     after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
552     so the syntax is guaranteed to be correct, but we need to check the values.
553    
554     Arguments:
555     p pointer to first char after '{'
556     minp pointer to int for min
557     maxp pointer to int for max
558     returned as -1 if no max
559     errorptr points to pointer to error message
560    
561     Returns: pointer to '}' on success;
562     current ptr on error, with errorptr set
563     */
564    
565 nigel 7 static const uschar *
566     read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
567 nigel 3 {
568     int min = 0;
569     int max = -1;
570    
571     while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
572    
573     if (*p == '}') max = min; else
574     {
575     if (*(++p) != '}')
576     {
577     max = 0;
578     while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
579     if (max < min)
580     {
581     *errorptr = ERR4;
582     return p;
583     }
584     }
585     }
586    
587     /* Do paranoid checks, then fill in the required variables, and pass back the
588     pointer to the terminating '}'. */
589    
590     if (min > 65535 || max > 65535)
591     *errorptr = ERR5;
592     else
593     {
594     *minp = min;
595     *maxp = max;
596     }
597     return p;
598     }
599    
600    
601    
602     /*************************************************
603     * Compile one branch *
604     *************************************************/
605    
606     /* Scan the pattern, compiling it into the code vector.
607    
608     Arguments:
609     options the option bits
610     bracket points to number of brackets used
611     code points to the pointer to the current code point
612     ptrptr points to the current pattern pointer
613     errorptr points to pointer to error message
614    
615     Returns: TRUE on success
616     FALSE, with *errorptr set on error
617     */
618    
619     static BOOL
620 nigel 7 compile_branch(int options, int *brackets, uschar **codeptr,
621     const uschar **ptrptr, const char **errorptr)
622 nigel 3 {
623     int repeat_type, op_type;
624     int repeat_min, repeat_max;
625     int bravalue, length;
626 nigel 19 int greedy_default, greedy_non_default;
627 nigel 3 register int c;
628     register uschar *code = *codeptr;
629 nigel 7 const uschar *ptr = *ptrptr;
630     const uschar *oldptr;
631 nigel 3 uschar *previous = NULL;
632     uschar class[32];
633    
634 nigel 19 /* Set up the default and non-default settings for greediness */
635    
636     greedy_default = ((options & PCRE_UNGREEDY) != 0);
637     greedy_non_default = greedy_default ^ 1;
638    
639 nigel 3 /* Switch on next character until the end of the branch */
640    
641     for (;; ptr++)
642     {
643     BOOL negate_class;
644     int class_charcount;
645     int class_lastchar;
646    
647     c = *ptr;
648     if ((options & PCRE_EXTENDED) != 0)
649     {
650     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
651     if (c == '#')
652     {
653     while ((c = *(++ptr)) != 0 && c != '\n');
654     continue;
655     }
656     }
657    
658     switch(c)
659     {
660     /* The branch terminates at end of string, |, or ). */
661    
662     case 0:
663     case '|':
664     case ')':
665     *codeptr = code;
666     *ptrptr = ptr;
667     return TRUE;
668    
669     /* Handle single-character metacharacters */
670    
671     case '^':
672     previous = NULL;
673     *code++ = OP_CIRC;
674     break;
675    
676     case '$':
677     previous = NULL;
678     *code++ = OP_DOLL;
679     break;
680    
681     case '.':
682     previous = code;
683     *code++ = OP_ANY;
684     break;
685    
686     /* Character classes. These always build a 32-byte bitmap of the permitted
687     characters, except in the special case where there is only one character.
688     For negated classes, we build the map as usual, then invert it at the end.
689     */
690    
691     case '[':
692     previous = code;
693    
694 nigel 13 /* If the first character is '^', set the negation flag, and use a
695     different opcode. This only matters if caseless matching is specified at
696     runtime. */
697 nigel 3
698     if ((c = *(++ptr)) == '^')
699     {
700     negate_class = TRUE;
701 nigel 13 *code++ = OP_NEGCLASS;
702 nigel 3 c = *(++ptr);
703     }
704 nigel 13 else
705     {
706     negate_class = FALSE;
707     *code++ = OP_CLASS;
708     }
709 nigel 3
710     /* Keep a count of chars so that we can optimize the case of just a single
711     character. */
712    
713     class_charcount = 0;
714     class_lastchar = -1;
715    
716     /* Initialize the 32-char bit map to all zeros. We have to build the
717     map in a temporary bit of store, in case the class contains only 1
718     character, because in that case the compiled code doesn't use the
719     bit map. */
720    
721     memset(class, 0, 32 * sizeof(uschar));
722    
723     /* Process characters until ] is reached. By writing this as a "do" it
724     means that an initial ] is taken as a data character. */
725    
726     do
727     {
728     if (c == 0)
729     {
730     *errorptr = ERR6;
731     goto FAILED;
732     }
733    
734     /* Backslash may introduce a single character, or it may introduce one
735     of the specials, which just set a flag. Escaped items are checked for
736     validity in the pre-compiling pass. The sequence \b is a special case.
737 nigel 7 Inside a class (and only there) it is treated as backspace. Elsewhere
738 nigel 3 it marks a word boundary. Other escapes have preset maps ready to
739     or into the one we are building. We assume they have more than one
740     character in them, so set class_count bigger than one. */
741    
742     if (c == '\\')
743     {
744     c = check_escape(&ptr, errorptr, *brackets, options, TRUE);
745     if (-c == ESC_b) c = '\b';
746     else if (c < 0)
747     {
748     class_charcount = 10;
749     switch (-c)
750     {
751     case ESC_d:
752     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];
753     continue;
754    
755     case ESC_D:
756     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];
757     continue;
758    
759     case ESC_w:
760     for (c = 0; c < 32; c++)
761     class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
762     continue;
763    
764     case ESC_W:
765     for (c = 0; c < 32; c++)
766     class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
767     continue;
768    
769     case ESC_s:
770     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];
771     continue;
772    
773     case ESC_S:
774     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];
775     continue;
776    
777     default:
778     *errorptr = ERR7;
779     goto FAILED;
780     }
781     }
782     /* Fall through if single character */
783     }
784    
785     /* A single character may be followed by '-' to form a range. However,
786     Perl does not permit ']' to be the end of the range. A '-' character
787     here is treated as a literal. */
788    
789     if (ptr[1] == '-' && ptr[2] != ']')
790     {
791     int d;
792     ptr += 2;
793     d = *ptr;
794    
795     if (d == 0)
796     {
797     *errorptr = ERR6;
798     goto FAILED;
799     }
800    
801     /* The second part of a range can be a single-character escape, but
802     not any of the other escapes. */
803    
804     if (d == '\\')
805     {
806     d = check_escape(&ptr, errorptr, *brackets, options, TRUE);
807     if (d < 0)
808     {
809     if (d == -ESC_b) d = '\b'; else
810     {
811     *errorptr = ERR7;
812     goto FAILED;
813     }
814     }
815     }
816    
817     if (d < c)
818     {
819     *errorptr = ERR8;
820     goto FAILED;
821     }
822    
823     for (; c <= d; c++)
824     {
825     class[c/8] |= (1 << (c&7));
826     if ((options & PCRE_CASELESS) != 0)
827     {
828     int uc = pcre_fcc[c]; /* flip case */
829     class[uc/8] |= (1 << (uc&7));
830     }
831     class_charcount++; /* in case a one-char range */
832     class_lastchar = c;
833     }
834     continue; /* Go get the next char in the class */
835     }
836    
837     /* Handle a lone single character - we can get here for a normal
838     non-escape char, or after \ that introduces a single character. */
839    
840     class [c/8] |= (1 << (c&7));
841     if ((options & PCRE_CASELESS) != 0)
842     {
843     c = pcre_fcc[c]; /* flip case */
844     class[c/8] |= (1 << (c&7));
845     }
846     class_charcount++;
847     class_lastchar = c;
848     }
849    
850     /* Loop until ']' reached; the check for end of string happens inside the
851     loop. This "while" is the end of the "do" above. */
852    
853     while ((c = *(++ptr)) != ']');
854    
855     /* If class_charcount is 1 and class_lastchar is not negative, we saw
856     precisely one character. This doesn't need the whole 32-byte bit map.
857     We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
858     it's negative. */
859    
860     if (class_charcount == 1 && class_lastchar >= 0)
861     {
862     if (negate_class)
863     {
864     code[-1] = OP_NOT;
865     }
866     else
867     {
868     code[-1] = OP_CHARS;
869     *code++ = 1;
870     }
871     *code++ = class_lastchar;
872     }
873    
874     /* Otherwise, negate the 32-byte map if necessary, and copy it into
875     the code vector. */
876    
877     else
878     {
879     if (negate_class)
880     for (c = 0; c < 32; c++) code[c] = ~class[c];
881     else
882     memcpy(code, class, 32);
883     code += 32;
884     }
885     break;
886    
887     /* Various kinds of repeat */
888    
889     case '{':
890     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
891     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);
892     if (*errorptr != NULL) goto FAILED;
893     goto REPEAT;
894    
895     case '*':
896     repeat_min = 0;
897     repeat_max = -1;
898     goto REPEAT;
899    
900     case '+':
901     repeat_min = 1;
902     repeat_max = -1;
903     goto REPEAT;
904    
905     case '?':
906     repeat_min = 0;
907     repeat_max = 1;
908    
909     REPEAT:
910     if (previous == NULL)
911     {
912     *errorptr = ERR9;
913     goto FAILED;
914     }
915    
916 nigel 19 /* If the next character is '?' this is a minimizing repeat, by default,
917     but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
918 nigel 3 next character. */
919    
920 nigel 19 if (ptr[1] == '?')
921     { repeat_type = greedy_non_default; ptr++; }
922     else repeat_type = greedy_default;
923 nigel 3
924     /* If the maximum is zero then the minimum must also be zero; Perl allows
925     this case, so we do too - by simply omitting the item altogether. */
926    
927     if (repeat_max == 0) code = previous;
928    
929     /* If previous was a string of characters, chop off the last one and use it
930     as the subject of the repeat. If there was only one character, we can
931     abolish the previous item altogether. */
932    
933     else if (*previous == OP_CHARS)
934     {
935     int len = previous[1];
936     if (len == 1)
937     {
938     c = previous[2];
939     code = previous;
940     }
941     else
942     {
943     c = previous[len+1];
944     previous[1]--;
945     code--;
946     }
947     op_type = 0; /* Use single-char op codes */
948     goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
949     }
950    
951     /* If previous was a single negated character ([^a] or similar), we use
952     one of the special opcodes, replacing it. The code is shared with single-
953     character repeats by adding a suitable offset into repeat_type. */
954    
955     else if ((int)*previous == OP_NOT)
956     {
957     op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
958     c = previous[1];
959     code = previous;
960     goto OUTPUT_SINGLE_REPEAT;
961     }
962    
963     /* If previous was a character type match (\d or similar), abolish it and
964     create a suitable repeat item. The code is shared with single-character
965     repeats by adding a suitable offset into repeat_type. */
966    
967     else if ((int)*previous < OP_EOD || *previous == OP_ANY)
968     {
969     op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
970     c = *previous;
971     code = previous;
972    
973     OUTPUT_SINGLE_REPEAT:
974     repeat_type += op_type; /* Combine both values for many cases */
975    
976     /* A minimum of zero is handled either as the special case * or ?, or as
977     an UPTO, with the maximum given. */
978    
979     if (repeat_min == 0)
980     {
981     if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
982     else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
983     else
984     {
985     *code++ = OP_UPTO + repeat_type;
986     *code++ = repeat_max >> 8;
987     *code++ = (repeat_max & 255);
988     }
989     }
990    
991     /* The case {1,} is handled as the special case + */
992    
993     else if (repeat_min == 1 && repeat_max == -1)
994     *code++ = OP_PLUS + repeat_type;
995    
996     /* The case {n,n} is just an EXACT, while the general case {n,m} is
997     handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
998    
999     else
1000     {
1001     if (repeat_min != 1)
1002     {
1003     *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
1004     *code++ = repeat_min >> 8;
1005     *code++ = (repeat_min & 255);
1006     }
1007    
1008     /* If the mininum is 1 and the previous item was a character string,
1009     we either have to put back the item that got cancelled if the string
1010     length was 1, or add the character back onto the end of a longer
1011     string. For a character type nothing need be done; it will just get put
1012     back naturally. */
1013    
1014     else if (*previous == OP_CHARS)
1015     {
1016     if (code == previous) code += 2; else previous[1]++;
1017     }
1018    
1019 nigel 9 /* If the maximum is unlimited, insert an OP_STAR. */
1020 nigel 3
1021 nigel 9 if (repeat_max < 0)
1022 nigel 3 {
1023     *code++ = c;
1024 nigel 9 *code++ = OP_STAR + repeat_type;
1025     }
1026    
1027     /* Else insert an UPTO if the max is greater than the min. */
1028    
1029     else if (repeat_max != repeat_min)
1030     {
1031     *code++ = c;
1032 nigel 3 repeat_max -= repeat_min;
1033     *code++ = OP_UPTO + repeat_type;
1034     *code++ = repeat_max >> 8;
1035     *code++ = (repeat_max & 255);
1036     }
1037     }
1038    
1039     /* The character or character type itself comes last in all cases. */
1040    
1041     *code++ = c;
1042     }
1043    
1044     /* If previous was a character class or a back reference, we put the repeat
1045     stuff after it. */
1046    
1047 nigel 13 else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1048     *previous == OP_REF)
1049 nigel 3 {
1050     if (repeat_min == 0 && repeat_max == -1)
1051     *code++ = OP_CRSTAR + repeat_type;
1052     else if (repeat_min == 1 && repeat_max == -1)
1053     *code++ = OP_CRPLUS + repeat_type;
1054     else if (repeat_min == 0 && repeat_max == 1)
1055     *code++ = OP_CRQUERY + repeat_type;
1056     else
1057     {
1058     *code++ = OP_CRRANGE + repeat_type;
1059     *code++ = repeat_min >> 8;
1060     *code++ = repeat_min & 255;
1061     if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1062     *code++ = repeat_max >> 8;
1063     *code++ = repeat_max & 255;
1064     }
1065     }
1066    
1067     /* If previous was a bracket group, we may have to replicate it in certain
1068     cases. If the maximum repeat count is unlimited, check that the bracket
1069     group cannot match the empty string, and diagnose an error if it can. */
1070    
1071     else if ((int)*previous >= OP_BRA)
1072     {
1073     int i;
1074 nigel 9 int len = code - previous;
1075 nigel 3
1076     if (repeat_max == -1 && could_be_empty(previous))
1077     {
1078     *errorptr = ERR10;
1079     goto FAILED;
1080     }
1081    
1082     /* If the minimum is greater than zero, and the maximum is unlimited or
1083     equal to the minimum, the first copy remains where it is, and is
1084     replicated up to the minimum number of times. This case includes the +
1085     repeat, but of course no replication is needed in that case. */
1086    
1087     if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))
1088     {
1089     for (i = 1; i < repeat_min; i++)
1090     {
1091 nigel 9 memcpy(code, previous, len);
1092     code += len;
1093 nigel 3 }
1094     }
1095    
1096     /* If the minimum is zero, stick BRAZERO in front of the first copy.
1097     Then, if there is a fixed upper limit, replicated up to that many times,
1098     sticking BRAZERO in front of all the optional ones. */
1099    
1100     else
1101     {
1102     if (repeat_min == 0)
1103     {
1104 nigel 9 memmove(previous+1, previous, len);
1105 nigel 3 code++;
1106     *previous++ = OP_BRAZERO + repeat_type;
1107     }
1108    
1109     for (i = 1; i < repeat_min; i++)
1110     {
1111 nigel 9 memcpy(code, previous, len);
1112     code += len;
1113 nigel 3 }
1114    
1115     for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1116     {
1117     *code++ = OP_BRAZERO + repeat_type;
1118 nigel 9 memcpy(code, previous, len);
1119     code += len;
1120 nigel 3 }
1121     }
1122    
1123     /* If the maximum is unlimited, set a repeater in the final copy. */
1124    
1125     if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;
1126     }
1127    
1128     /* Else there's some kind of shambles */
1129    
1130     else
1131     {
1132     *errorptr = ERR11;
1133     goto FAILED;
1134     }
1135    
1136     /* In all case we no longer have a previous item. */
1137    
1138     previous = NULL;
1139     break;
1140    
1141    
1142     /* Start of nested bracket sub-expression, or comment or lookahead.
1143     First deal with special things that can come after a bracket; all are
1144     introduced by ?, and the appearance of any of them means that this is not a
1145     referencing group. They were checked for validity in the first pass over
1146     the string, so we don't have to check for syntax errors here. */
1147    
1148     case '(':
1149     previous = code; /* Only real brackets can be repeated */
1150     if (*(++ptr) == '?')
1151     {
1152     bravalue = OP_BRA;
1153    
1154     switch (*(++ptr))
1155     {
1156     case '#':
1157     case 'i':
1158     case 'm':
1159     case 's':
1160     case 'x':
1161 nigel 19 case 'U':
1162     case 'X':
1163 nigel 3 ptr++;
1164     while (*ptr != ')') ptr++;
1165     previous = NULL;
1166     continue;
1167    
1168     case ':': /* Non-extracting bracket */
1169     ptr++;
1170     break;
1171    
1172     case '=': /* Assertions can't be repeated */
1173     bravalue = OP_ASSERT;
1174     ptr++;
1175     previous = NULL;
1176     break;
1177    
1178     case '!':
1179     bravalue = OP_ASSERT_NOT;
1180     ptr++;
1181     previous = NULL;
1182     break;
1183    
1184     case '>': /* "Match once" brackets */
1185     if ((options & PCRE_EXTRA) != 0) /* Not yet standard */
1186     {
1187     bravalue = OP_ONCE;
1188     ptr++;
1189     previous = NULL;
1190     break;
1191     }
1192     /* Else fall through */
1193    
1194     default:
1195     *errorptr = ERR12;
1196     goto FAILED;
1197     }
1198     }
1199    
1200     /* Else we have a referencing group */
1201    
1202     else
1203     {
1204     if (++(*brackets) > EXTRACT_MAX)
1205     {
1206     *errorptr = ERR13;
1207     goto FAILED;
1208     }
1209     bravalue = OP_BRA + *brackets;
1210     }
1211    
1212     /* Process nested bracketed re; at end pointer is on the bracket. We copy
1213     code into a non-register variable in order to be able to pass its address
1214     because some compilers complain otherwise. */
1215    
1216     *code = bravalue;
1217     {
1218     uschar *mcode = code;
1219     if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))
1220     goto FAILED;
1221     code = mcode;
1222     }
1223    
1224     if (*ptr != ')')
1225     {
1226     *errorptr = ERR14;
1227     goto FAILED;
1228     }
1229     break;
1230    
1231     /* Check \ for being a real metacharacter; if not, fall through and handle
1232     it as a data character at the start of a string. Escape items are checked
1233     for validity in the pre-compiling pass. */
1234    
1235     case '\\':
1236     oldptr = ptr;
1237     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1238    
1239     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1240     are arranged to be the negation of the corresponding OP_values. For the
1241     back references, the values are ESC_REF plus the reference number. Only
1242     back references and those types that consume a character may be repeated.
1243     We can test for values between ESC_b and ESC_Z for the latter; this may
1244     have to change if any new ones are ever created. */
1245    
1246     if (c < 0)
1247     {
1248     if (-c >= ESC_REF)
1249     {
1250     int refnum = -c - ESC_REF;
1251     if (*brackets < refnum)
1252     {
1253     *errorptr = ERR15;
1254     goto FAILED;
1255     }
1256     previous = code;
1257     *code++ = OP_REF;
1258     *code++ = refnum;
1259     }
1260     else
1261     {
1262     previous = (-c > ESC_b && -c < ESC_X)? code : NULL;
1263     *code++ = -c;
1264     }
1265     continue;
1266     }
1267    
1268 nigel 7 /* Data character: reset and fall through */
1269 nigel 3
1270     ptr = oldptr;
1271     c = '\\';
1272    
1273     /* Handle a run of data characters until a metacharacter is encountered.
1274     The first character is guaranteed not to be whitespace or # when the
1275     extended flag is set. */
1276    
1277     NORMAL_CHAR:
1278     default:
1279     previous = code;
1280     *code = OP_CHARS;
1281     code += 2;
1282     length = 0;
1283    
1284     do
1285     {
1286     if ((options & PCRE_EXTENDED) != 0)
1287     {
1288     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
1289     if (c == '#')
1290     {
1291     while ((c = *(++ptr)) != 0 && c != '\n');
1292     if (c == 0) break;
1293     continue;
1294     }
1295     }
1296    
1297     /* Backslash may introduce a data char or a metacharacter. Escaped items
1298     are checked for validity in the pre-compiling pass. Stop the string
1299     before a metaitem. */
1300    
1301     if (c == '\\')
1302     {
1303     oldptr = ptr;
1304     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1305     if (c < 0) { ptr = oldptr; break; }
1306     }
1307    
1308     /* Ordinary character or single-char escape */
1309    
1310     *code++ = c;
1311     length++;
1312     }
1313    
1314     /* This "while" is the end of the "do" above. */
1315    
1316     while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1317    
1318     /* Compute the length and set it in the data vector, and advance to
1319     the next state. */
1320    
1321     previous[1] = length;
1322 nigel 15 if (length < 255) ptr--;
1323 nigel 3 break;
1324     }
1325     } /* end of big loop */
1326    
1327     /* Control never reaches here by falling through, only by a goto for all the
1328     error states. Pass back the position in the pattern so that it can be displayed
1329     to the user for diagnosing the error. */
1330    
1331     FAILED:
1332     *ptrptr = ptr;
1333     return FALSE;
1334     }
1335    
1336    
1337    
1338    
1339     /*************************************************
1340     * Compile sequence of alternatives *
1341     *************************************************/
1342    
1343     /* On entry, ptr is pointing past the bracket character, but on return
1344     it points to the closing bracket, or vertical bar, or end of string.
1345     The code variable is pointing at the byte into which the BRA operator has been
1346     stored.
1347    
1348     Argument:
1349     options the option bits
1350     brackets -> int containing the number of extracting brackets used
1351     codeptr -> the address of the current code pointer
1352     ptrptr -> the address of the current pattern pointer
1353     errorptr -> pointer to error message
1354    
1355     Returns: TRUE on success
1356     */
1357    
1358     static BOOL
1359 nigel 7 compile_regex(int options, int *brackets, uschar **codeptr,
1360     const uschar **ptrptr, const char **errorptr)
1361 nigel 3 {
1362 nigel 7 const uschar *ptr = *ptrptr;
1363 nigel 3 uschar *code = *codeptr;
1364     uschar *start_bracket = code;
1365    
1366     for (;;)
1367     {
1368     int length;
1369     uschar *last_branch = code;
1370    
1371     code += 3;
1372     if (!compile_branch(options, brackets, &code, &ptr, errorptr))
1373     {
1374     *ptrptr = ptr;
1375     return FALSE;
1376     }
1377    
1378     /* Fill in the length of the last branch */
1379    
1380     length = code - last_branch;
1381     last_branch[1] = length >> 8;
1382     last_branch[2] = length & 255;
1383    
1384     /* Reached end of expression, either ')' or end of pattern. Insert a
1385     terminating ket and the length of the whole bracketed item, and return,
1386     leaving the pointer at the terminating char. */
1387    
1388     if (*ptr != '|')
1389     {
1390     length = code - start_bracket;
1391     *code++ = OP_KET;
1392     *code++ = length >> 8;
1393     *code++ = length & 255;
1394     *codeptr = code;
1395     *ptrptr = ptr;
1396     return TRUE;
1397     }
1398    
1399     /* Another branch follows; insert an "or" node and advance the pointer. */
1400    
1401     *code = OP_ALT;
1402     ptr++;
1403     }
1404     /* Control never reaches here */
1405     }
1406    
1407    
1408    
1409     /*************************************************
1410     * Check for anchored expression *
1411     *************************************************/
1412    
1413     /* Try to find out if this is an anchored regular expression. Consider each
1414     alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1415     all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1416     it's anchored. However, if this is a multiline pattern, then only OP_SOD
1417     counts, since OP_CIRC can match in the middle.
1418    
1419     A branch is also implicitly anchored if it starts with .* because that will try
1420     the rest of the pattern at all possible matching points, so there is no point
1421     trying them again.
1422    
1423     Argument: points to start of expression (the bracket)
1424     Returns: TRUE or FALSE
1425     */
1426    
1427     static BOOL
1428 nigel 7 is_anchored(register const uschar *code, BOOL multiline)
1429 nigel 3 {
1430     do {
1431     int op = (int)code[3];
1432     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)
1433     { if (!is_anchored(code+3, multiline)) return FALSE; }
1434     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1435     { if (code[4] != OP_ANY) return FALSE; }
1436     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;
1437     code += (code[1] << 8) + code[2];
1438     }
1439     while (*code == OP_ALT);
1440     return TRUE;
1441     }
1442    
1443    
1444    
1445     /*************************************************
1446     * Check for start with \n line expression *
1447     *************************************************/
1448    
1449     /* This is called for multiline expressions to try to find out if every branch
1450     starts with ^ so that "first char" processing can be done to speed things up.
1451    
1452     Argument: points to start of expression (the bracket)
1453     Returns: TRUE or FALSE
1454     */
1455    
1456     static BOOL
1457 nigel 7 is_startline(const uschar *code)
1458 nigel 3 {
1459     do {
1460     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1461     { if (!is_startline(code+3)) return FALSE; }
1462     else if (code[3] != OP_CIRC) return FALSE;
1463     code += (code[1] << 8) + code[2];
1464     }
1465     while (*code == OP_ALT);
1466     return TRUE;
1467     }
1468    
1469    
1470    
1471     /*************************************************
1472     * Check for fixed first char *
1473     *************************************************/
1474    
1475     /* Try to find out if there is a fixed first character. This is called for
1476     unanchored expressions, as it speeds up their processing quite considerably.
1477     Consider each alternative branch. If they all start with the same char, or with
1478     a bracket all of whose alternatives start with the same char (recurse ad lib),
1479     then we return that char, otherwise -1.
1480    
1481     Argument: points to start of expression (the bracket)
1482     Returns: -1 or the fixed first char
1483     */
1484    
1485     static int
1486     find_firstchar(uschar *code)
1487     {
1488     register int c = -1;
1489     do
1490     {
1491     register int charoffset = 4;
1492    
1493     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1494     {
1495     register int d;
1496     if ((d = find_firstchar(code+3)) < 0) return -1;
1497     if (c < 0) c = d; else if (c != d) return -1;
1498     }
1499    
1500     else switch(code[3])
1501     {
1502     default:
1503     return -1;
1504    
1505     case OP_EXACT: /* Fall through */
1506     charoffset++;
1507    
1508     case OP_CHARS: /* Fall through */
1509     charoffset++;
1510    
1511     case OP_PLUS:
1512     case OP_MINPLUS:
1513     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;
1514     break;
1515     }
1516     code += (code[1] << 8) + code[2];
1517     }
1518     while (*code == OP_ALT);
1519     return c;
1520     }
1521    
1522    
1523    
1524     /*************************************************
1525     * Compile a Regular Expression *
1526     *************************************************/
1527    
1528     /* This function takes a string and returns a pointer to a block of store
1529     holding a compiled version of the expression.
1530    
1531     Arguments:
1532     pattern the regular expression
1533     options various option bits
1534     errorptr pointer to pointer to error text
1535     erroroffset ptr offset in pattern where error was detected
1536    
1537     Returns: pointer to compiled data block, or NULL on error,
1538     with errorptr and erroroffset set
1539     */
1540    
1541     pcre *
1542 nigel 7 pcre_compile(const char *pattern, int options, const char **errorptr,
1543 nigel 3 int *erroroffset)
1544     {
1545     real_pcre *re;
1546     int spaces = 0;
1547     int length = 3; /* For initial BRA plus length */
1548     int runlength;
1549     int c, size;
1550     int bracount = 0;
1551     int brastack[200];
1552     int top_backref = 0;
1553 nigel 7 unsigned int brastackptr = 0;
1554     uschar *code;
1555     const uschar *ptr;
1556 nigel 3
1557     #ifdef DEBUG
1558     uschar *code_base, *code_end;
1559     #endif
1560    
1561     /* We can't pass back an error message if errorptr is NULL; I guess the best we
1562     can do is just return NULL. */
1563    
1564     if (errorptr == NULL) return NULL;
1565     *errorptr = NULL;
1566    
1567     /* However, we can give a message for this error */
1568    
1569     if (erroroffset == NULL)
1570     {
1571     *errorptr = ERR16;
1572     return NULL;
1573     }
1574     *erroroffset = 0;
1575    
1576     if ((options & ~PUBLIC_OPTIONS) != 0)
1577     {
1578     *errorptr = ERR17;
1579     return NULL;
1580     }
1581    
1582 nigel 9 DPRINTF(("------------------------------------------------------------------\n"));
1583     DPRINTF(("%s\n", pattern));
1584 nigel 3
1585     /* The first thing to do is to make a pass over the pattern to compute the
1586     amount of store required to hold the compiled code. This does not have to be
1587     perfect as long as errors are overestimates. At the same time we can detect any
1588     internal flag settings. Make an attempt to correct for any counted white space
1589     if an "extended" flag setting appears late in the pattern. We can't be so
1590     clever for #-comments. */
1591    
1592 nigel 7 ptr = (const uschar *)(pattern - 1);
1593 nigel 3 while ((c = *(++ptr)) != 0)
1594     {
1595     int min, max;
1596     int class_charcount;
1597    
1598     if ((pcre_ctypes[c] & ctype_space) != 0)
1599     {
1600     if ((options & PCRE_EXTENDED) != 0) continue;
1601     spaces++;
1602     }
1603    
1604     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1605     {
1606     while ((c = *(++ptr)) != 0 && c != '\n');
1607     continue;
1608     }
1609    
1610     switch(c)
1611     {
1612     /* A backslashed item may be an escaped "normal" character or a
1613     character type. For a "normal" character, put the pointers and
1614     character back so that tests for whitespace etc. in the input
1615     are done correctly. */
1616    
1617     case '\\':
1618     {
1619 nigel 7 const uschar *save_ptr = ptr;
1620 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1621     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1622     if (c >= 0)
1623     {
1624     ptr = save_ptr;
1625     c = '\\';
1626     goto NORMAL_CHAR;
1627     }
1628     }
1629     length++;
1630    
1631     /* A back reference needs an additional char, plus either one or 5
1632     bytes for a repeat. We also need to keep the value of the highest
1633     back reference. */
1634    
1635     if (c <= -ESC_REF)
1636     {
1637     int refnum = -c - ESC_REF;
1638     if (refnum > top_backref) top_backref = refnum;
1639     length++; /* For single back reference */
1640     if (ptr[1] == '{' && is_counted_repeat(ptr+2))
1641     {
1642     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1643     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1644     if ((min == 0 && (max == 1 || max == -1)) ||
1645     (min == 1 && max == -1))
1646     length++;
1647     else length += 5;
1648     if (ptr[1] == '?') ptr++;
1649     }
1650     }
1651     continue;
1652    
1653     case '^':
1654     case '.':
1655     case '$':
1656     case '*': /* These repeats won't be after brackets; */
1657     case '+': /* those are handled separately */
1658     case '?':
1659     length++;
1660     continue;
1661    
1662     /* This covers the cases of repeats after a single char, metachar, class,
1663     or back reference. */
1664    
1665     case '{':
1666     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
1667     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
1668     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1669     if ((min == 0 && (max == 1 || max == -1)) ||
1670     (min == 1 && max == -1))
1671     length++;
1672     else
1673     {
1674     length--; /* Uncount the original char or metachar */
1675     if (min == 1) length++; else if (min > 0) length += 4;
1676     if (max > 0) length += 4; else length += 2;
1677     }
1678     if (ptr[1] == '?') ptr++;
1679     continue;
1680    
1681     /* An alternation contains an offset to the next branch or ket. */
1682     case '|':
1683     length += 3;
1684     continue;
1685    
1686     /* A character class uses 33 characters. Don't worry about character types
1687     that aren't allowed in classes - they'll get picked up during the compile.
1688     A character class that contains only one character uses 2 or 3 bytes,
1689     depending on whether it is negated or not. Notice this where we can. */
1690    
1691     case '[':
1692     class_charcount = 0;
1693     if (*(++ptr) == '^') ptr++;
1694     do
1695     {
1696     if (*ptr == '\\')
1697     {
1698 nigel 9 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1699 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1700 nigel 9 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1701 nigel 3 }
1702     else class_charcount++;
1703     ptr++;
1704     }
1705     while (*ptr != 0 && *ptr != ']');
1706    
1707     /* Repeats for negated single chars are handled by the general code */
1708    
1709     if (class_charcount == 1) length += 3; else
1710     {
1711     length += 33;
1712    
1713     /* A repeat needs either 1 or 5 bytes. */
1714    
1715 nigel 9 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1716 nigel 3 {
1717     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1718     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1719     if ((min == 0 && (max == 1 || max == -1)) ||
1720     (min == 1 && max == -1))
1721     length++;
1722     else length += 5;
1723     if (ptr[1] == '?') ptr++;
1724     }
1725     }
1726     continue;
1727    
1728     /* Brackets may be genuine groups or special things */
1729    
1730     case '(':
1731    
1732     /* Handle special forms of bracket, which all start (? */
1733    
1734     if (ptr[1] == '?') switch (c = ptr[2])
1735     {
1736     /* Skip over comments entirely */
1737     case '#':
1738     ptr += 3;
1739     while (*ptr != 0 && *ptr != ')') ptr++;
1740     if (*ptr == 0)
1741     {
1742     *errorptr = ERR18;
1743     goto PCRE_ERROR_RETURN;
1744     }
1745     continue;
1746    
1747     /* Non-referencing groups and lookaheads just move the pointer on, and
1748     then behave like a non-special bracket, except that they don't increment
1749     the count of extracting brackets. */
1750    
1751     case ':':
1752     case '=':
1753     case '!':
1754     ptr += 2;
1755     break;
1756    
1757     /* Ditto for the "once only" bracket, allowed only if the extra bit
1758     is set. */
1759    
1760     case '>':
1761     if ((options & PCRE_EXTRA) != 0)
1762     {
1763     ptr += 2;
1764     break;
1765     }
1766 nigel 19 /* Else fall through */
1767 nigel 3
1768     /* Else loop setting valid options until ) is met. Anything else is an
1769     error. */
1770    
1771     default:
1772     ptr += 2;
1773     for (;; ptr++)
1774     {
1775     if ((c = *ptr) == 'i')
1776     {
1777     options |= PCRE_CASELESS;
1778     continue;
1779     }
1780     else if ((c = *ptr) == 'm')
1781     {
1782     options |= PCRE_MULTILINE;
1783     continue;
1784     }
1785     else if (c == 's')
1786     {
1787     options |= PCRE_DOTALL;
1788     continue;
1789     }
1790     else if (c == 'x')
1791     {
1792     options |= PCRE_EXTENDED;
1793     length -= spaces; /* Already counted spaces */
1794     continue;
1795     }
1796 nigel 19 else if (c == 'X')
1797     {
1798     options |= PCRE_EXTRA;
1799     continue;
1800     }
1801     else if (c == 'U')
1802     {
1803     options |= PCRE_UNGREEDY;
1804     continue;
1805     }
1806 nigel 3 else if (c == ')') break;
1807    
1808     *errorptr = ERR12;
1809     goto PCRE_ERROR_RETURN;
1810     }
1811     continue; /* End of this bracket handling */
1812     }
1813    
1814     /* Extracting brackets must be counted so we can process escapes in a
1815     Perlish way. */
1816    
1817     else bracount++;
1818    
1819     /* Non-special forms of bracket. Save length for computing whole length
1820     at end if there's a repeat that requires duplication of the group. */
1821    
1822     if (brastackptr >= sizeof(brastack)/sizeof(int))
1823     {
1824     *errorptr = ERR19;
1825     goto PCRE_ERROR_RETURN;
1826     }
1827    
1828     brastack[brastackptr++] = length;
1829     length += 3;
1830     continue;
1831    
1832     /* Handle ket. Look for subsequent max/min; for certain sets of values we
1833 nigel 9 have to replicate this bracket up to that many times. If brastackptr is
1834     0 this is an unmatched bracket which will generate an error, but take care
1835     not to try to access brastack[-1]. */
1836 nigel 3
1837     case ')':
1838     length += 3;
1839     {
1840 nigel 9 int minval = 1;
1841     int maxval = 1;
1842     int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1843 nigel 3
1844     /* Leave ptr at the final char; for read_repeat_counts this happens
1845     automatically; for the others we need an increment. */
1846    
1847     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1848     {
1849 nigel 9 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1850 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1851     }
1852 nigel 9 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1853     else if (c == '+') { maxval = -1; ptr++; }
1854     else if (c == '?') { minval = 0; ptr++; }
1855 nigel 3
1856 nigel 9 /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1857     if there is a limited maximum we have to replicate up to maxval-1 times
1858     and allow for a BRAZERO item before each optional copy, as we also have
1859     to do before the first copy if the minimum is zero. */
1860 nigel 3
1861 nigel 9 if (minval == 0) length++;
1862     else if (minval > 1) length += (minval - 1) * duplength;
1863     if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1864 nigel 3 }
1865     continue;
1866    
1867     /* Non-special character. For a run of such characters the length required
1868     is the number of characters + 2, except that the maximum run length is 255.
1869     We won't get a skipped space or a non-data escape or the start of a #
1870     comment as the first character, so the length can't be zero. */
1871    
1872     NORMAL_CHAR:
1873     default:
1874     length += 2;
1875     runlength = 0;
1876     do
1877     {
1878     if ((pcre_ctypes[c] & ctype_space) != 0)
1879     {
1880     if ((options & PCRE_EXTENDED) != 0) continue;
1881     spaces++;
1882     }
1883    
1884     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1885     {
1886     while ((c = *(++ptr)) != 0 && c != '\n');
1887     continue;
1888     }
1889    
1890     /* Backslash may introduce a data char or a metacharacter; stop the
1891     string before the latter. */
1892    
1893     if (c == '\\')
1894     {
1895 nigel 7 const uschar *saveptr = ptr;
1896 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1897     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1898     if (c < 0) { ptr = saveptr; break; }
1899     }
1900    
1901     /* Ordinary character or single-char escape */
1902    
1903     runlength++;
1904     }
1905    
1906     /* This "while" is the end of the "do" above. */
1907    
1908     while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1909    
1910     ptr--;
1911     length += runlength;
1912     continue;
1913     }
1914     }
1915    
1916     length += 4; /* For final KET and END */
1917    
1918     if (length > 65539)
1919     {
1920     *errorptr = ERR20;
1921     return NULL;
1922     }
1923    
1924     /* Compute the size of data block needed and get it, either from malloc or
1925 nigel 9 externally provided function. We specify "code[0]" in the offsetof() expression
1926     rather than just "code", because it has been reported that one broken compiler
1927     fails on "code" because it is also an independent variable. It should make no
1928     difference to the value of the offsetof(). */
1929 nigel 3
1930 nigel 9 size = length + offsetof(real_pcre, code[0]);
1931 nigel 3 re = (real_pcre *)(pcre_malloc)(size);
1932    
1933     if (re == NULL)
1934     {
1935     *errorptr = ERR21;
1936     return NULL;
1937     }
1938    
1939 nigel 9 /* Put in the magic number and the options. */
1940    
1941 nigel 3 re->magic_number = MAGIC_NUMBER;
1942     re->options = options;
1943    
1944     /* Set up a starting, non-extracting bracket, then compile the expression. On
1945     error, *errorptr will be set non-NULL, so we don't need to look at the result
1946     of the function here. */
1947    
1948 nigel 7 ptr = (const uschar *)pattern;
1949 nigel 3 code = re->code;
1950     *code = OP_BRA;
1951     bracount = 0;
1952     (void)compile_regex(options, &bracount, &code, &ptr, errorptr);
1953     re->top_bracket = bracount;
1954     re->top_backref = top_backref;
1955    
1956     /* If not reached end of pattern on success, there's an excess bracket. */
1957    
1958     if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
1959    
1960     /* Fill in the terminating state and check for disastrous overflow, but
1961     if debugging, leave the test till after things are printed out. */
1962    
1963     *code++ = OP_END;
1964    
1965     #ifndef DEBUG
1966     if (code - re->code > length) *errorptr = ERR23;
1967     #endif
1968    
1969     /* Failed to compile */
1970    
1971     if (*errorptr != NULL)
1972     {
1973     (pcre_free)(re);
1974     PCRE_ERROR_RETURN:
1975 nigel 7 *erroroffset = ptr - (const uschar *)pattern;
1976 nigel 3 return NULL;
1977     }
1978    
1979     /* If the anchored option was not passed, set flag if we can determine that it
1980     is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if
1981     we can determine what the first character has to be, because that speeds up
1982     unanchored matches no end. In the case of multiline matches, an alternative is
1983     to set the PCRE_STARTLINE flag if all branches start with ^. */
1984    
1985     if ((options & PCRE_ANCHORED) == 0)
1986     {
1987     if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))
1988     re->options |= PCRE_ANCHORED;
1989     else
1990     {
1991 nigel 9 int ch = find_firstchar(re->code);
1992     if (ch >= 0)
1993 nigel 3 {
1994 nigel 9 re->first_char = ch;
1995 nigel 3 re->options |= PCRE_FIRSTSET;
1996     }
1997     else if (is_startline(re->code))
1998     re->options |= PCRE_STARTLINE;
1999     }
2000     }
2001    
2002     /* Print out the compiled data for debugging */
2003    
2004     #ifdef DEBUG
2005    
2006     printf("Length = %d top_bracket = %d top_backref=%d\n",
2007     length, re->top_bracket, re->top_backref);
2008    
2009     if (re->options != 0)
2010     {
2011 nigel 19 printf("%s%s%s%s%s%s%s%s\n",
2012 nigel 3 ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2013     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2014     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2015     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2016     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2017     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2018 nigel 19 ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2019     ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2020 nigel 3 }
2021    
2022     if ((re->options & PCRE_FIRSTSET) != 0)
2023     {
2024     if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
2025     else printf("First char = \\x%02x\n", re->first_char);
2026     }
2027    
2028     code_end = code;
2029     code_base = code = re->code;
2030    
2031     while (code < code_end)
2032     {
2033     int charlength;
2034    
2035     printf("%3d ", code - code_base);
2036    
2037     if (*code >= OP_BRA)
2038     {
2039     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2040     code += 2;
2041     }
2042    
2043     else switch(*code)
2044     {
2045     case OP_CHARS:
2046     charlength = *(++code);
2047     printf("%3d ", charlength);
2048     while (charlength-- > 0)
2049     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2050     break;
2051    
2052     case OP_KETRMAX:
2053     case OP_KETRMIN:
2054     case OP_ALT:
2055     case OP_KET:
2056     case OP_ASSERT:
2057     case OP_ASSERT_NOT:
2058     case OP_ONCE:
2059     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2060     code += 2;
2061     break;
2062    
2063     case OP_STAR:
2064     case OP_MINSTAR:
2065     case OP_PLUS:
2066     case OP_MINPLUS:
2067     case OP_QUERY:
2068     case OP_MINQUERY:
2069     case OP_TYPESTAR:
2070     case OP_TYPEMINSTAR:
2071     case OP_TYPEPLUS:
2072     case OP_TYPEMINPLUS:
2073     case OP_TYPEQUERY:
2074     case OP_TYPEMINQUERY:
2075     if (*code >= OP_TYPESTAR)
2076     printf(" %s", OP_names[code[1]]);
2077     else if (isprint(c = code[1])) printf(" %c", c);
2078     else printf(" \\x%02x", c);
2079     printf("%s", OP_names[*code++]);
2080     break;
2081    
2082     case OP_EXACT:
2083     case OP_UPTO:
2084     case OP_MINUPTO:
2085     if (isprint(c = code[3])) printf(" %c{", c);
2086     else printf(" \\x%02x{", c);
2087 nigel 11 if (*code != OP_EXACT) printf("0,");
2088 nigel 3 printf("%d}", (code[1] << 8) + code[2]);
2089     if (*code == OP_MINUPTO) printf("?");
2090     code += 3;
2091     break;
2092    
2093     case OP_TYPEEXACT:
2094     case OP_TYPEUPTO:
2095     case OP_TYPEMINUPTO:
2096     printf(" %s{", OP_names[code[3]]);
2097     if (*code != OP_TYPEEXACT) printf(",");
2098     printf("%d}", (code[1] << 8) + code[2]);
2099     if (*code == OP_TYPEMINUPTO) printf("?");
2100     code += 3;
2101     break;
2102    
2103     case OP_NOT:
2104     if (isprint(c = *(++code))) printf(" [^%c]", c);
2105     else printf(" [^\\x%02x]", c);
2106     break;
2107    
2108     case OP_NOTSTAR:
2109     case OP_NOTMINSTAR:
2110     case OP_NOTPLUS:
2111     case OP_NOTMINPLUS:
2112     case OP_NOTQUERY:
2113     case OP_NOTMINQUERY:
2114     if (isprint(c = code[1])) printf(" [^%c]", c);
2115     else printf(" [^\\x%02x]", c);
2116     printf("%s", OP_names[*code++]);
2117     break;
2118    
2119     case OP_NOTEXACT:
2120     case OP_NOTUPTO:
2121     case OP_NOTMINUPTO:
2122     if (isprint(c = code[3])) printf(" [^%c]{", c);
2123     else printf(" [^\\x%02x]{", c);
2124     if (*code != OP_NOTEXACT) printf(",");
2125     printf("%d}", (code[1] << 8) + code[2]);
2126     if (*code == OP_NOTMINUPTO) printf("?");
2127     code += 3;
2128     break;
2129    
2130     case OP_REF:
2131     printf(" \\%d", *(++code));
2132 nigel 9 code ++;
2133     goto CLASS_REF_REPEAT;
2134 nigel 3
2135     case OP_CLASS:
2136 nigel 13 case OP_NEGCLASS:
2137 nigel 3 {
2138     int i, min, max;
2139    
2140 nigel 13 if (*code++ == OP_CLASS) printf(" [");
2141     else printf(" ^[");
2142 nigel 3
2143     for (i = 0; i < 256; i++)
2144     {
2145     if ((code[i/8] & (1 << (i&7))) != 0)
2146     {
2147     int j;
2148     for (j = i+1; j < 256; j++)
2149     if ((code[j/8] & (1 << (j&7))) == 0) break;
2150     if (i == '-' || i == ']') printf("\\");
2151     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2152     if (--j > i)
2153     {
2154     printf("-");
2155     if (j == '-' || j == ']') printf("\\");
2156     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2157     }
2158     i = j;
2159     }
2160     }
2161     printf("]");
2162     code += 32;
2163    
2164 nigel 9 CLASS_REF_REPEAT:
2165    
2166 nigel 3 switch(*code)
2167     {
2168     case OP_CRSTAR:
2169     case OP_CRMINSTAR:
2170     case OP_CRPLUS:
2171     case OP_CRMINPLUS:
2172     case OP_CRQUERY:
2173     case OP_CRMINQUERY:
2174     printf("%s", OP_names[*code]);
2175     break;
2176    
2177     case OP_CRRANGE:
2178     case OP_CRMINRANGE:
2179     min = (code[1] << 8) + code[2];
2180     max = (code[3] << 8) + code[4];
2181     if (max == 0) printf("{%d,}", min);
2182     else printf("{%d,%d}", min, max);
2183     if (*code == OP_CRMINRANGE) printf("?");
2184     code += 4;
2185     break;
2186    
2187     default:
2188     code--;
2189     }
2190     }
2191     break;
2192    
2193     /* Anything else is just a one-node item */
2194    
2195     default:
2196     printf(" %s", OP_names[*code]);
2197     break;
2198     }
2199    
2200     code++;
2201     printf("\n");
2202     }
2203     printf("------------------------------------------------------------------\n");
2204    
2205     /* This check is done here in the debugging case so that the code that
2206     was compiled can be seen. */
2207    
2208     if (code - re->code > length)
2209     {
2210     *errorptr = ERR23;
2211     (pcre_free)(re);
2212     *erroroffset = ptr - (uschar *)pattern;
2213     return NULL;
2214     }
2215     #endif
2216    
2217     return (pcre *)re;
2218     }
2219    
2220    
2221    
2222     /*************************************************
2223     * Match a character type *
2224     *************************************************/
2225    
2226     /* Not used in all the places it might be as it's sometimes faster
2227     to put the code inline.
2228    
2229     Arguments:
2230     type the character type
2231     c the character
2232     dotall the dotall flag
2233    
2234     Returns: TRUE if character is of the type
2235     */
2236    
2237     static BOOL
2238     match_type(int type, int c, BOOL dotall)
2239     {
2240    
2241     #ifdef DEBUG
2242     if (isprint(c)) printf("matching subject %c against ", c);
2243     else printf("matching subject \\x%02x against ", c);
2244     printf("%s\n", OP_names[type]);
2245     #endif
2246    
2247     switch(type)
2248     {
2249     case OP_ANY: return dotall || c != '\n';
2250     case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0;
2251     case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0;
2252     case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;
2253     case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0;
2254     case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0;
2255     case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0;
2256     }
2257     return FALSE;
2258     }
2259    
2260    
2261    
2262     /*************************************************
2263     * Match a back-reference *
2264     *************************************************/
2265    
2266     /* If a back reference hasn't been set, the match fails.
2267    
2268     Arguments:
2269     number reference number
2270     eptr points into the subject
2271     length length to be matched
2272     md points to match data block
2273    
2274     Returns: TRUE if matched
2275     */
2276    
2277     static BOOL
2278 nigel 7 match_ref(int number, register const uschar *eptr, int length, match_data *md)
2279 nigel 3 {
2280 nigel 7 const uschar *p = md->start_subject + md->offset_vector[number];
2281 nigel 3
2282     #ifdef DEBUG
2283     if (eptr >= md->end_subject)
2284     printf("matching subject <null>");
2285     else
2286     {
2287     printf("matching subject ");
2288     pchars(eptr, length, TRUE, md);
2289     }
2290     printf(" against backref ");
2291     pchars(p, length, FALSE, md);
2292     printf("\n");
2293     #endif
2294    
2295     /* Always fail if not enough characters left */
2296    
2297     if (length > md->end_subject - p) return FALSE;
2298    
2299     /* Separate the caselesss case for speed */
2300    
2301     if (md->caseless)
2302     { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
2303     else
2304     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2305    
2306     return TRUE;
2307     }
2308    
2309    
2310    
2311     /*************************************************
2312     * Match from current position *
2313     *************************************************/
2314    
2315     /* On entry ecode points to the first opcode, and eptr to the first character.
2316    
2317     Arguments:
2318     eptr pointer in subject
2319     ecode position in code
2320     offset_top current top pointer
2321     md pointer to "static" info for the match
2322    
2323     Returns: TRUE if matched
2324     */
2325    
2326     static BOOL
2327 nigel 7 match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2328 nigel 3 match_data *md)
2329     {
2330     for (;;)
2331     {
2332     int min, max, ctype;
2333     register int i;
2334     register int c;
2335 nigel 7 BOOL minimize = FALSE;
2336 nigel 3
2337     /* Opening bracket. Check the alternative branches in turn, failing if none
2338     match. We have to set the start offset if required and there is space
2339     in the offset vector so that it is available for subsequent back references
2340     if the bracket matches. However, if the bracket fails, we must put back the
2341     previous value of both offsets in case they were set by a previous copy of
2342     the same bracket. Don't worry about setting the flag for the error case here;
2343     that is handled in the code for KET. */
2344    
2345     if ((int)*ecode >= OP_BRA)
2346     {
2347     int number = (*ecode - OP_BRA) << 1;
2348 nigel 7 int save_offset1 = 0, save_offset2 = 0;
2349 nigel 3
2350 nigel 9 DPRINTF(("start bracket %d\n", number/2));
2351 nigel 3
2352     if (number > 0 && number < md->offset_end)
2353     {
2354     save_offset1 = md->offset_vector[number];
2355     save_offset2 = md->offset_vector[number+1];
2356     md->offset_vector[number] = eptr - md->start_subject;
2357    
2358 nigel 9 DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
2359 nigel 3 }
2360    
2361     /* Recurse for all the alternatives. */
2362    
2363     do
2364     {
2365     if (match(eptr, ecode+3, offset_top, md)) return TRUE;
2366     ecode += (ecode[1] << 8) + ecode[2];
2367     }
2368     while (*ecode == OP_ALT);
2369    
2370 nigel 9 DPRINTF(("bracket %d failed\n", number/2));
2371 nigel 3
2372     if (number > 0 && number < md->offset_end)
2373     {
2374     md->offset_vector[number] = save_offset1;
2375     md->offset_vector[number+1] = save_offset2;
2376     }
2377    
2378     return FALSE;
2379     }
2380    
2381     /* Other types of node can be handled by a switch */
2382    
2383     switch(*ecode)
2384     {
2385     case OP_END:
2386     md->end_match_ptr = eptr; /* Record where we ended */
2387     md->end_offset_top = offset_top; /* and how many extracts were taken */
2388     return TRUE;
2389    
2390     /* The equivalent of Prolog's "cut" - if the rest doesn't match, the
2391     whole thing doesn't match, so we have to get out via a longjmp(). */
2392    
2393     case OP_CUT:
2394     if (match(eptr, ecode+1, offset_top, md)) return TRUE;
2395     longjmp(md->fail_env, 1);
2396    
2397     /* Assertion brackets. Check the alternative branches in turn - the
2398     matching won't pass the KET for an assertion. If any one branch matches,
2399     the assertion is true. */
2400    
2401     case OP_ASSERT:
2402     do
2403     {
2404     if (match(eptr, ecode+3, offset_top, md)) break;
2405     ecode += (ecode[1] << 8) + ecode[2];
2406     }
2407     while (*ecode == OP_ALT);
2408     if (*ecode == OP_KET) return FALSE;
2409    
2410     /* Continue from after the assertion, updating the offsets high water
2411     mark, since extracts may have been taken during the assertion. */
2412    
2413     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2414     ecode += 3;
2415     offset_top = md->end_offset_top;
2416     continue;
2417    
2418     /* Negative assertion: all branches must fail to match */
2419    
2420     case OP_ASSERT_NOT:
2421     do
2422     {
2423     if (match(eptr, ecode+3, offset_top, md)) return FALSE;
2424     ecode += (ecode[1] << 8) + ecode[2];
2425     }
2426     while (*ecode == OP_ALT);
2427     ecode += 3;
2428     continue;
2429    
2430     /* "Once" brackets are like assertion brackets except that after a match,
2431     the point in the subject string is not moved back. Thus there can never be
2432 nigel 5 a move back into the brackets. Check the alternative branches in turn - the
2433 nigel 3 matching won't pass the KET for this kind of subpattern. If any one branch
2434     matches, we carry on, leaving the subject pointer. */
2435    
2436     case OP_ONCE:
2437     do
2438     {
2439     if (match(eptr, ecode+3, offset_top, md)) break;
2440     ecode += (ecode[1] << 8) + ecode[2];
2441     }
2442     while (*ecode == OP_ALT);
2443     if (*ecode == OP_KET) return FALSE;
2444    
2445     /* Continue as from after the assertion, updating the offsets high water
2446     mark, since extracts may have been taken. */
2447    
2448     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2449     ecode += 3;
2450     offset_top = md->end_offset_top;
2451     eptr = md->end_match_ptr;
2452     continue;
2453    
2454     /* An alternation is the end of a branch; scan along to find the end of the
2455     bracketed group and go to there. */
2456    
2457     case OP_ALT:
2458     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2459     break;
2460    
2461     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
2462     that it may occur zero times. It may repeat infinitely, or not at all -
2463     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
2464     repeat limits are compiled as a number of copies, with the optional ones
2465     preceded by BRAZERO or BRAMINZERO. */
2466    
2467     case OP_BRAZERO:
2468     {
2469 nigel 7 const uschar *next = ecode+1;
2470 nigel 3 if (match(eptr, next, offset_top, md)) return TRUE;
2471     do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2472     ecode = next + 3;
2473     }
2474     break;
2475    
2476     case OP_BRAMINZERO:
2477     {
2478 nigel 7 const uschar *next = ecode+1;
2479 nigel 3 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2480     if (match(eptr, next+3, offset_top, md)) return TRUE;
2481     ecode++;
2482     }
2483     break;;
2484    
2485     /* End of a group, repeated or non-repeating. If we are at the end of
2486     an assertion "group", stop matching and return TRUE, but record the
2487     current high water mark for use by positive assertions. */
2488    
2489     case OP_KET:
2490     case OP_KETRMIN:
2491     case OP_KETRMAX:
2492     {
2493     int number;
2494 nigel 7 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2495 nigel 3
2496     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2497     {
2498     md->end_match_ptr = eptr; /* For ONCE */
2499     md->end_offset_top = offset_top;
2500     return TRUE;
2501     }
2502    
2503     /* In all other cases we have to check the group number back at the
2504     start and if necessary complete handling an extraction by setting the
2505     final offset and bumping the high water mark. */
2506    
2507     number = (*prev - OP_BRA) << 1;
2508    
2509 nigel 9 DPRINTF(("end bracket %d\n", number/2));
2510 nigel 3
2511     if (number > 0)
2512     {
2513     if (number >= md->offset_end) md->offset_overflow = TRUE; else
2514     {
2515     md->offset_vector[number+1] = eptr - md->start_subject;
2516     if (offset_top <= number) offset_top = number + 2;
2517     }
2518     }
2519    
2520     /* For a non-repeating ket, just advance to the next node and continue at
2521     this level. */
2522    
2523     if (*ecode == OP_KET)
2524     {
2525     ecode += 3;
2526     break;
2527     }
2528    
2529     /* The repeating kets try the rest of the pattern or restart from the
2530     preceding bracket, in the appropriate order. */
2531    
2532     if (*ecode == OP_KETRMIN)
2533     {
2534     if (match(eptr, ecode+3, offset_top, md) ||
2535     match(eptr, prev, offset_top, md)) return TRUE;
2536     }
2537     else /* OP_KETRMAX */
2538     {
2539     if (match(eptr, prev, offset_top, md) ||
2540     match(eptr, ecode+3, offset_top, md)) return TRUE;
2541     }
2542     }
2543     return FALSE;
2544    
2545     /* Start of subject unless notbol, or after internal newline if multiline */
2546    
2547     case OP_CIRC:
2548     if (md->notbol && eptr == md->start_subject) return FALSE;
2549     if (md->multiline)
2550     {
2551     if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
2552     ecode++;
2553     break;
2554     }
2555     /* ... else fall through */
2556    
2557     /* Start of subject assertion */
2558    
2559     case OP_SOD:
2560     if (eptr != md->start_subject) return FALSE;
2561     ecode++;
2562     break;
2563    
2564     /* Assert before internal newline if multiline, or before
2565     a terminating newline unless endonly is set, else end of subject unless
2566     noteol is set. */
2567    
2568     case OP_DOLL:
2569     if (md->noteol && eptr >= md->end_subject) return FALSE;
2570     if (md->multiline)
2571     {
2572     if (eptr < md->end_subject && *eptr != '\n') return FALSE;
2573     ecode++;
2574     break;
2575     }
2576     else if (!md->endonly)
2577     {
2578     if (eptr < md->end_subject - 1 ||
2579     (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
2580     ecode++;
2581     break;
2582     }
2583     /* ... else fall through */
2584    
2585     /* End of subject assertion */
2586    
2587     case OP_EOD:
2588     if (eptr < md->end_subject) return FALSE;
2589     ecode++;
2590     break;
2591    
2592     /* Word boundary assertions */
2593    
2594     case OP_NOT_WORD_BOUNDARY:
2595     case OP_WORD_BOUNDARY:
2596     {
2597     BOOL prev_is_word = (eptr != md->start_subject) &&
2598     ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);
2599     BOOL cur_is_word = (eptr < md->end_subject) &&
2600     ((pcre_ctypes[*eptr] & ctype_word) != 0);
2601     if ((*ecode++ == OP_WORD_BOUNDARY)?
2602     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2603     return FALSE;
2604     }
2605     break;
2606    
2607     /* Match a single character type; inline for speed */
2608    
2609     case OP_ANY:
2610     if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;
2611     if (eptr++ >= md->end_subject) return FALSE;
2612     ecode++;
2613     break;
2614    
2615     case OP_NOT_DIGIT:
2616     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)
2617     return FALSE;
2618     ecode++;
2619     break;
2620    
2621     case OP_DIGIT:
2622     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)
2623     return FALSE;
2624     ecode++;
2625     break;
2626    
2627     case OP_NOT_WHITESPACE:
2628     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)
2629     return FALSE;
2630     ecode++;
2631     break;
2632    
2633     case OP_WHITESPACE:
2634     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)
2635     return FALSE;
2636     ecode++;
2637     break;
2638    
2639     case OP_NOT_WORDCHAR:
2640     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)
2641     return FALSE;
2642     ecode++;
2643     break;
2644    
2645     case OP_WORDCHAR:
2646     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)
2647     return FALSE;
2648     ecode++;
2649     break;
2650    
2651     /* Match a back reference, possibly repeatedly. Look past the end of the
2652     item to see if there is repeat information following. The code is similar
2653     to that for character classes, but repeated for efficiency. Then obey
2654     similar code to character type repeats - written out again for speed.
2655     However, if the referenced string is the empty string, always treat
2656     it as matched, any number of times (otherwise there could be infinite
2657     loops). */
2658    
2659     case OP_REF:
2660     {
2661     int length;
2662     int number = ecode[1] << 1; /* Doubled reference number */
2663     ecode += 2; /* Advance past the item */
2664    
2665     if (number >= offset_top || md->offset_vector[number] < 0)
2666     {
2667     md->errorcode = PCRE_ERROR_BADREF;
2668     return FALSE;
2669     }
2670    
2671     length = md->offset_vector[number+1] - md->offset_vector[number];
2672    
2673     switch (*ecode)
2674     {
2675     case OP_CRSTAR:
2676     case OP_CRMINSTAR:
2677     case OP_CRPLUS:
2678     case OP_CRMINPLUS:
2679     case OP_CRQUERY:
2680     case OP_CRMINQUERY:
2681     c = *ecode++ - OP_CRSTAR;
2682     minimize = (c & 1) != 0;
2683     min = rep_min[c]; /* Pick up values from tables; */
2684     max = rep_max[c]; /* zero for max => infinity */
2685     if (max == 0) max = INT_MAX;
2686     break;
2687    
2688     case OP_CRRANGE:
2689     case OP_CRMINRANGE:
2690     minimize = (*ecode == OP_CRMINRANGE);
2691     min = (ecode[1] << 8) + ecode[2];
2692     max = (ecode[3] << 8) + ecode[4];
2693     if (max == 0) max = INT_MAX;
2694     ecode += 5;
2695     break;
2696    
2697     default: /* No repeat follows */
2698     if (!match_ref(number, eptr, length, md)) return FALSE;
2699     eptr += length;
2700     continue; /* With the main loop */
2701     }
2702    
2703     /* If the length of the reference is zero, just continue with the
2704     main loop. */
2705    
2706     if (length == 0) continue;
2707    
2708     /* First, ensure the minimum number of matches are present. We get back
2709     the length of the reference string explicitly rather than passing the
2710     address of eptr, so that eptr can be a register variable. */
2711    
2712     for (i = 1; i <= min; i++)
2713     {
2714     if (!match_ref(number, eptr, length, md)) return FALSE;
2715     eptr += length;
2716     }
2717    
2718     /* If min = max, continue at the same level without recursion.
2719     They are not both allowed to be zero. */
2720    
2721     if (min == max) continue;
2722    
2723     /* If minimizing, keep trying and advancing the pointer */
2724    
2725     if (minimize)
2726     {
2727     for (i = min;; i++)
2728     {
2729     if (match(eptr, ecode, offset_top, md)) return TRUE;
2730     if (i >= max || !match_ref(number, eptr, length, md))
2731     return FALSE;
2732     eptr += length;
2733     }
2734     /* Control never gets here */
2735     }
2736    
2737     /* If maximizing, find the longest string and work backwards */
2738    
2739     else
2740     {
2741 nigel 7 const uschar *pp = eptr;
2742 nigel 3 for (i = min; i < max; i++)
2743     {
2744     if (!match_ref(number, eptr, length, md)) break;
2745     eptr += length;
2746     }
2747     while (eptr >= pp)
2748     {
2749     if (match(eptr, ecode, offset_top, md)) return TRUE;
2750     eptr -= length;
2751     }
2752     return FALSE;
2753     }
2754     }
2755     /* Control never gets here */
2756    
2757     /* Match a character class, possibly repeatedly. Look past the end of the
2758     item to see if there is repeat information following. Then obey similar
2759     code to character type repeats - written out again for speed. If caseless
2760     matching was set at runtime but not at compile time, we have to check both
2761 nigel 13 versions of a character, and we have to behave differently for positive and
2762     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2763     treated differently. */
2764 nigel 3
2765     case OP_CLASS:
2766 nigel 13 case OP_NEGCLASS:
2767 nigel 3 {
2768 nigel 13 BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2769 nigel 7 const uschar *data = ecode + 1; /* Save for matching */
2770     ecode += 33; /* Advance past the item */
2771 nigel 3
2772     switch (*ecode)
2773     {
2774     case OP_CRSTAR:
2775     case OP_CRMINSTAR:
2776     case OP_CRPLUS:
2777     case OP_CRMINPLUS:
2778     case OP_CRQUERY:
2779     case OP_CRMINQUERY:
2780     c = *ecode++ - OP_CRSTAR;
2781     minimize = (c & 1) != 0;
2782     min = rep_min[c]; /* Pick up values from tables; */
2783     max = rep_max[c]; /* zero for max => infinity */
2784     if (max == 0) max = INT_MAX;
2785     break;
2786    
2787     case OP_CRRANGE:
2788     case OP_CRMINRANGE:
2789     minimize = (*ecode == OP_CRMINRANGE);
2790     min = (ecode[1] << 8) + ecode[2];
2791     max = (ecode[3] << 8) + ecode[4];
2792     if (max == 0) max = INT_MAX;
2793     ecode += 5;
2794     break;
2795    
2796     default: /* No repeat follows */
2797 nigel 13 min = max = 1;
2798     break;
2799 nigel 3 }
2800    
2801     /* First, ensure the minimum number of matches are present. */
2802    
2803     for (i = 1; i <= min; i++)
2804     {
2805     if (eptr >= md->end_subject) return FALSE;
2806     c = *eptr++;
2807 nigel 13
2808     /* Either not runtime caseless, or it was a positive class. For
2809     runtime caseless, continue if either case is in the map. */
2810    
2811     if (!nasty_case)
2812 nigel 3 {
2813 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2814     if (md->runtime_caseless)
2815     {
2816     c = pcre_fcc[c];
2817     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2818     }
2819     }
2820    
2821     /* Runtime caseless and it was a negative class. Continue only if
2822     both cases are in the map. */
2823    
2824     else
2825     {
2826     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2827 nigel 3 c = pcre_fcc[c];
2828     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829     }
2830 nigel 13
2831 nigel 3 return FALSE;
2832     }
2833    
2834     /* If max == min we can continue with the main loop without the
2835     need to recurse. */
2836    
2837     if (min == max) continue;
2838    
2839     /* If minimizing, keep testing the rest of the expression and advancing
2840     the pointer while it matches the class. */
2841    
2842     if (minimize)
2843     {
2844     for (i = min;; i++)
2845     {
2846     if (match(eptr, ecode, offset_top, md)) return TRUE;
2847     if (i >= max || eptr >= md->end_subject) return FALSE;
2848     c = *eptr++;
2849 nigel 13
2850     /* Either not runtime caseless, or it was a positive class. For
2851     runtime caseless, continue if either case is in the map. */
2852    
2853     if (!nasty_case)
2854 nigel 3 {
2855 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2856     if (md->runtime_caseless)
2857     {
2858     c = pcre_fcc[c];
2859     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860     }
2861     }
2862    
2863     /* Runtime caseless and it was a negative class. Continue only if
2864     both cases are in the map. */
2865    
2866     else
2867     {
2868     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2869 nigel 3 c = pcre_fcc[c];
2870     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871     }
2872 nigel 13
2873 nigel 3 return FALSE;
2874     }
2875     /* Control never gets here */
2876     }
2877    
2878     /* If maximizing, find the longest possible run, then work backwards. */
2879    
2880     else
2881     {
2882 nigel 7 const uschar *pp = eptr;
2883 nigel 3 for (i = min; i < max; eptr++, i++)
2884     {
2885     if (eptr >= md->end_subject) break;
2886     c = *eptr;
2887 nigel 13
2888     /* Either not runtime caseless, or it was a positive class. For
2889     runtime caseless, continue if either case is in the map. */
2890    
2891     if (!nasty_case)
2892 nigel 3 {
2893 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2894     if (md->runtime_caseless)
2895     {
2896     c = pcre_fcc[c];
2897     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2898     }
2899     }
2900    
2901     /* Runtime caseless and it was a negative class. Continue only if
2902     both cases are in the map. */
2903    
2904     else
2905     {
2906     if ((data[c/8] & (1 << (c&7))) == 0) break;
2907 nigel 3 c = pcre_fcc[c];
2908     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2909     }
2910 nigel 13
2911 nigel 3 break;
2912     }
2913    
2914     while (eptr >= pp)
2915     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2916     return FALSE;
2917     }
2918     }
2919     /* Control never gets here */
2920    
2921     /* Match a run of characters */
2922    
2923     case OP_CHARS:
2924     {
2925     register int length = ecode[1];
2926     ecode += 2;
2927    
2928 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
2929 nigel 3 if (eptr >= md->end_subject)
2930     printf("matching subject <null> against pattern ");
2931     else
2932     {
2933     printf("matching subject ");
2934     pchars(eptr, length, TRUE, md);
2935     printf(" against pattern ");
2936     }
2937     pchars(ecode, length, FALSE, md);
2938     printf("\n");
2939 nigel 9 #endif
2940 nigel 3
2941     if (length > md->end_subject - eptr) return FALSE;
2942     if (md->caseless)
2943     {
2944     while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;
2945     }
2946     else
2947     {
2948     while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
2949     }
2950     }
2951     break;
2952    
2953     /* Match a single character repeatedly; different opcodes share code. */
2954    
2955     case OP_EXACT:
2956     min = max = (ecode[1] << 8) + ecode[2];
2957     ecode += 3;
2958     goto REPEATCHAR;
2959    
2960     case OP_UPTO:
2961     case OP_MINUPTO:
2962     min = 0;
2963     max = (ecode[1] << 8) + ecode[2];
2964     minimize = *ecode == OP_MINUPTO;
2965     ecode += 3;
2966     goto REPEATCHAR;
2967    
2968     case OP_STAR:
2969     case OP_MINSTAR:
2970     case OP_PLUS:
2971     case OP_MINPLUS:
2972     case OP_QUERY:
2973     case OP_MINQUERY:
2974     c = *ecode++ - OP_STAR;
2975     minimize = (c & 1) != 0;
2976     min = rep_min[c]; /* Pick up values from tables; */
2977     max = rep_max[c]; /* zero for max => infinity */
2978     if (max == 0) max = INT_MAX;
2979    
2980     /* Common code for all repeated single-character matches. We can give
2981     up quickly if there are fewer than the minimum number of characters left in
2982     the subject. */
2983    
2984     REPEATCHAR:
2985     if (min > md->end_subject - eptr) return FALSE;
2986     c = *ecode++;
2987    
2988     /* The code is duplicated for the caseless and caseful cases, for speed,
2989     since matching characters is likely to be quite common. First, ensure the
2990     minimum number of matches are present. If min = max, continue at the same
2991     level without recursing. Otherwise, if minimizing, keep trying the rest of
2992     the expression and advancing one matching character if failing, up to the
2993     maximum. Alternatively, if maximizing, find the maximum number of
2994     characters and work backwards. */
2995    
2996 nigel 9 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2997     max, eptr));
2998 nigel 3
2999     if (md->caseless)
3000     {
3001     c = pcre_lcc[c];
3002     for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;
3003     if (min == max) continue;
3004     if (minimize)
3005     {
3006     for (i = min;; i++)
3007     {
3008     if (match(eptr, ecode, offset_top, md)) return TRUE;
3009     if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])
3010     return FALSE;
3011     }
3012     /* Control never gets here */
3013     }
3014     else
3015     {
3016 nigel 7 const uschar *pp = eptr;
3017 nigel 3 for (i = min; i < max; i++)
3018     {
3019     if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
3020     eptr++;
3021     }
3022     while (eptr >= pp)
3023     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3024     return FALSE;
3025     }
3026     /* Control never gets here */
3027     }
3028    
3029     /* Caseful comparisons */
3030    
3031     else
3032     {
3033     for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3034     if (min == max) continue;
3035     if (minimize)
3036     {
3037     for (i = min;; i++)
3038     {
3039     if (match(eptr, ecode, offset_top, md)) return TRUE;
3040     if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3041     }
3042     /* Control never gets here */
3043     }
3044     else
3045     {
3046 nigel 7 const uschar *pp = eptr;
3047 nigel 3 for (i = min; i < max; i++)
3048     {
3049     if (eptr >= md->end_subject || c != *eptr) break;
3050     eptr++;
3051     }
3052     while (eptr >= pp)
3053     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3054     return FALSE;
3055     }
3056     }
3057     /* Control never gets here */
3058    
3059     /* Match a negated single character */
3060    
3061     case OP_NOT:
3062 nigel 9 if (eptr >= md->end_subject) return FALSE;
3063 nigel 3 ecode++;
3064     if (md->caseless)
3065     {
3066     if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;
3067     }
3068     else
3069     {
3070     if (*ecode++ == *eptr++) return FALSE;
3071     }
3072     break;
3073    
3074     /* Match a negated single character repeatedly. This is almost a repeat of
3075     the code for a repeated single character, but I haven't found a nice way of
3076     commoning these up that doesn't require a test of the positive/negative
3077     option for each character match. Maybe that wouldn't add very much to the
3078     time taken, but character matching *is* what this is all about... */
3079    
3080     case OP_NOTEXACT:
3081     min = max = (ecode[1] << 8) + ecode[2];
3082     ecode += 3;
3083     goto REPEATNOTCHAR;
3084    
3085     case OP_NOTUPTO:
3086     case OP_NOTMINUPTO:
3087     min = 0;
3088     max = (ecode[1] << 8) + ecode[2];
3089     minimize = *ecode == OP_NOTMINUPTO;
3090     ecode += 3;
3091     goto REPEATNOTCHAR;
3092    
3093     case OP_NOTSTAR:
3094     case OP_NOTMINSTAR:
3095     case OP_NOTPLUS:
3096     case OP_NOTMINPLUS:
3097     case OP_NOTQUERY:
3098     case OP_NOTMINQUERY:
3099     c = *ecode++ - OP_NOTSTAR;
3100     minimize = (c & 1) != 0;
3101     min = rep_min[c]; /* Pick up values from tables; */
3102     max = rep_max[c]; /* zero for max => infinity */
3103     if (max == 0) max = INT_MAX;
3104    
3105     /* Common code for all repeated single-character matches. We can give
3106     up quickly if there are fewer than the minimum number of characters left in
3107     the subject. */
3108    
3109     REPEATNOTCHAR:
3110     if (min > md->end_subject - eptr) return FALSE;
3111     c = *ecode++;
3112    
3113     /* The code is duplicated for the caseless and caseful cases, for speed,
3114     since matching characters is likely to be quite common. First, ensure the
3115     minimum number of matches are present. If min = max, continue at the same
3116     level without recursing. Otherwise, if minimizing, keep trying the rest of
3117     the expression and advancing one matching character if failing, up to the
3118     maximum. Alternatively, if maximizing, find the maximum number of
3119     characters and work backwards. */
3120    
3121 nigel 9 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3122     max, eptr));
3123 nigel 3
3124     if (md->caseless)
3125     {
3126     c = pcre_lcc[c];
3127     for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;
3128     if (min == max) continue;
3129     if (minimize)
3130     {
3131     for (i = min;; i++)
3132     {
3133     if (match(eptr, ecode, offset_top, md)) return TRUE;
3134     if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])
3135     return FALSE;
3136     }
3137     /* Control never gets here */
3138     }
3139     else
3140     {
3141 nigel 7 const uschar *pp = eptr;
3142 nigel 3 for (i = min; i < max; i++)
3143     {
3144     if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
3145     eptr++;
3146     }
3147     while (eptr >= pp)
3148     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3149     return FALSE;
3150     }
3151     /* Control never gets here */
3152     }
3153    
3154     /* Caseful comparisons */
3155    
3156     else
3157     {
3158     for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3159     if (min == max) continue;
3160     if (minimize)
3161     {
3162     for (i = min;; i++)
3163     {
3164     if (match(eptr, ecode, offset_top, md)) return TRUE;
3165     if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3166     }
3167     /* Control never gets here */
3168     }
3169     else
3170     {
3171 nigel 7 const uschar *pp = eptr;
3172 nigel 3 for (i = min; i < max; i++)
3173     {
3174     if (eptr >= md->end_subject || c == *eptr) break;
3175     eptr++;
3176     }
3177     while (eptr >= pp)
3178     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3179     return FALSE;
3180     }
3181     }
3182     /* Control never gets here */
3183    
3184     /* Match a single character type repeatedly; several different opcodes
3185     share code. This is very similar to the code for single characters, but we
3186     repeat it in the interests of efficiency. */
3187    
3188     case OP_TYPEEXACT:
3189     min = max = (ecode[1] << 8) + ecode[2];
3190     minimize = TRUE;
3191     ecode += 3;
3192     goto REPEATTYPE;
3193    
3194     case OP_TYPEUPTO:
3195     case OP_TYPEMINUPTO:
3196     min = 0;
3197     max = (ecode[1] << 8) + ecode[2];
3198     minimize = *ecode == OP_TYPEMINUPTO;
3199     ecode += 3;
3200     goto REPEATTYPE;
3201    
3202     case OP_TYPESTAR:
3203     case OP_TYPEMINSTAR:
3204     case OP_TYPEPLUS:
3205     case OP_TYPEMINPLUS:
3206     case OP_TYPEQUERY:
3207     case OP_TYPEMINQUERY:
3208     c = *ecode++ - OP_TYPESTAR;
3209     minimize = (c & 1) != 0;
3210     min = rep_min[c]; /* Pick up values from tables; */
3211     max = rep_max[c]; /* zero for max => infinity */
3212     if (max == 0) max = INT_MAX;
3213    
3214     /* Common code for all repeated single character type matches */
3215    
3216     REPEATTYPE:
3217     ctype = *ecode++; /* Code for the character type */
3218    
3219     /* First, ensure the minimum number of matches are present. Use inline
3220     code for maximizing the speed, and do the type test once at the start
3221     (i.e. keep it out of the loop). Also test that there are at least the
3222     minimum number of characters before we start. */
3223    
3224     if (min > md->end_subject - eptr) return FALSE;
3225     if (min > 0) switch(ctype)
3226     {
3227     case OP_ANY:
3228     if (!md->dotall)
3229     { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3230     else eptr += min;
3231     break;
3232    
3233     case OP_NOT_DIGIT:
3234     for (i = 1; i <= min; i++)
3235     if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3236     break;
3237    
3238     case OP_DIGIT:
3239     for (i = 1; i <= min; i++)
3240     if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3241     break;
3242    
3243     case OP_NOT_WHITESPACE:
3244     for (i = 1; i <= min; i++)
3245     if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3246     break;
3247    
3248     case OP_WHITESPACE:
3249     for (i = 1; i <= min; i++)
3250     if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3251     break;
3252    
3253     case OP_NOT_WORDCHAR:
3254     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)
3255     return FALSE;
3256     break;
3257    
3258     case OP_WORDCHAR:
3259     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)
3260     return FALSE;
3261     break;
3262     }
3263    
3264     /* If min = max, continue at the same level without recursing */
3265    
3266     if (min == max) continue;
3267    
3268     /* If minimizing, we have to test the rest of the pattern before each
3269     subsequent match, so inlining isn't much help; just use the function. */
3270    
3271     if (minimize)
3272     {
3273     for (i = min;; i++)
3274     {
3275     if (match(eptr, ecode, offset_top, md)) return TRUE;
3276     if (i >= max || eptr >= md->end_subject ||
3277     !match_type(ctype, *eptr++, md->dotall))
3278     return FALSE;
3279     }
3280     /* Control never gets here */
3281     }
3282    
3283     /* If maximizing it is worth using inline code for speed, doing the type
3284     test once at the start (i.e. keep it out of the loop). */
3285    
3286     else
3287     {
3288 nigel 7 const uschar *pp = eptr;
3289 nigel 3 switch(ctype)
3290     {
3291     case OP_ANY:
3292     if (!md->dotall)
3293     {
3294     for (i = min; i < max; i++)
3295     {
3296     if (eptr >= md->end_subject || *eptr == '\n') break;
3297     eptr++;
3298     }
3299     }
3300     else
3301     {
3302     c = max - min;
3303     if (c > md->end_subject - eptr) c = md->end_subject - eptr;
3304     eptr += c;
3305     }
3306     break;
3307    
3308     case OP_NOT_DIGIT:
3309     for (i = min; i < max; i++)
3310     {
3311     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)
3312     break;
3313     eptr++;
3314     }
3315     break;
3316    
3317     case OP_DIGIT:
3318     for (i = min; i < max; i++)
3319     {
3320     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)
3321     break;
3322     eptr++;
3323     }
3324     break;
3325    
3326     case OP_NOT_WHITESPACE:
3327     for (i = min; i < max; i++)
3328     {
3329     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)
3330     break;
3331     eptr++;
3332     }
3333     break;
3334    
3335     case OP_WHITESPACE:
3336     for (i = min; i < max; i++)
3337     {
3338     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)
3339     break;
3340     eptr++;
3341     }
3342     break;
3343    
3344     case OP_NOT_WORDCHAR:
3345     for (i = min; i < max; i++)
3346     {
3347     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)
3348     break;
3349     eptr++;
3350     }
3351     break;
3352    
3353     case OP_WORDCHAR:
3354     for (i = min; i < max; i++)
3355     {
3356     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)
3357     break;
3358     eptr++;
3359     }
3360     break;
3361     }
3362    
3363     while (eptr >= pp)
3364     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3365     return FALSE;
3366     }
3367     /* Control never gets here */
3368    
3369     /* There's been some horrible disaster. */
3370    
3371     default:
3372 nigel 9 DPRINTF(("Unknown opcode %d\n", *ecode));
3373 nigel 3 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3374     return FALSE;
3375     }
3376    
3377     /* Do not stick any code in here without much thought; it is assumed
3378     that "continue" in the code above comes out to here to repeat the main
3379     loop. */
3380    
3381     } /* End of main loop */
3382     /* Control never reaches here */
3383     }
3384    
3385    
3386    
3387     /*************************************************
3388 nigel 9 * Segregate setjmp() *
3389     *************************************************/
3390    
3391     /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3392     is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3393     hide it in a separate function. This is called only when PCRE_EXTRA is set,
3394     since it's needed only for the extension \X option, and with any luck, a good
3395     compiler will spot the tail recursion and compile it efficiently.
3396    
3397 nigel 11 Arguments:
3398     eptr pointer in subject
3399     ecode position in code
3400     offset_top current top pointer
3401     md pointer to "static" info for the match
3402    
3403     Returns: TRUE if matched
3404 nigel 9 */
3405    
3406 nigel 11 static BOOL
3407     match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3408     match_data *match_block)
3409 nigel 9 {
3410 nigel 11 return setjmp(match_block->fail_env) == 0 &&
3411     match(eptr, ecode, offset_top, match_block);
3412 nigel 9 }
3413    
3414    
3415    
3416     /*************************************************
3417 nigel 3 * Execute a Regular Expression *
3418     *************************************************/
3419    
3420     /* This function applies a compiled re to a subject string and picks out
3421     portions of the string if it matches. Two elements in the vector are set for
3422     each substring: the offsets to the start and end of the substring.
3423    
3424     Arguments:
3425     external_re points to the compiled expression
3426     external_extra points to "hints" from pcre_study() or is NULL
3427     subject points to the subject string
3428     length length of subject string (may contain binary zeros)
3429     options option bits
3430     offsets points to a vector of ints to be filled in with offsets
3431     offsetcount the number of elements in the vector
3432    
3433     Returns: > 0 => success; value is the number of elements filled in
3434     = 0 => success, but offsets is not big enough
3435     -1 => failed to match
3436     < -1 => some kind of unexpected problem
3437     */
3438    
3439     int
3440     pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3441     const char *subject, int length, int options, int *offsets, int offsetcount)
3442     {
3443 nigel 11 int resetcount, ocount;
3444 nigel 3 int first_char = -1;
3445     match_data match_block;
3446 nigel 7 const uschar *start_bits = NULL;
3447 nigel 9 const uschar *start_match = (const uschar *)subject;
3448 nigel 7 const uschar *end_subject;
3449     const real_pcre *re = (const real_pcre *)external_re;
3450     const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3451 nigel 11 BOOL using_temporary_offsets = FALSE;
3452 nigel 3 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3453     BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3454    
3455     if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3456    
3457     if (re == NULL || subject == NULL ||
3458     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3459     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3460    
3461 nigel 7 match_block.start_subject = (const uschar *)subject;
3462 nigel 3 match_block.end_subject = match_block.start_subject + length;
3463     end_subject = match_block.end_subject;
3464    
3465     match_block.caseless = ((re->options | options) & PCRE_CASELESS) != 0;
3466     match_block.runtime_caseless = match_block.caseless &&
3467     (re->options & PCRE_CASELESS) == 0;
3468    
3469     match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;
3470     match_block.dotall = ((re->options | options) & PCRE_DOTALL) != 0;
3471     match_block.endonly = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;
3472    
3473     match_block.notbol = (options & PCRE_NOTBOL) != 0;
3474     match_block.noteol = (options & PCRE_NOTEOL) != 0;
3475    
3476     match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
3477    
3478     /* If the expression has got more back references than the offsets supplied can
3479     hold, we get a temporary bit of working store to use during the matching.
3480 nigel 11 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3481     of 2. */
3482 nigel 3
3483 nigel 11 ocount = offsetcount & (-2);
3484     if (re->top_backref > 0 && re->top_backref >= ocount/2)
3485 nigel 3 {
3486     ocount = re->top_backref * 2 + 2;
3487 nigel 15 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3488 nigel 3 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3489 nigel 11 using_temporary_offsets = TRUE;
3490 nigel 9 DPRINTF(("Got memory to hold back references\n"));
3491 nigel 3 }
3492     else match_block.offset_vector = offsets;
3493    
3494     match_block.offset_end = ocount;
3495     match_block.offset_overflow = FALSE;
3496    
3497     /* Compute the minimum number of offsets that we need to reset each time. Doing
3498     this makes a huge difference to execution time when there aren't many brackets
3499     in the pattern. */
3500    
3501     resetcount = 2 + re->top_bracket * 2;
3502     if (resetcount > offsetcount) resetcount = ocount;
3503    
3504     /* If MULTILINE is set at exec time but was not set at compile time, and the
3505     anchored flag is set, we must re-check because a setting provoked by ^ in the
3506     pattern is not right in multi-line mode. Calling is_anchored() again here does
3507     the right check, because multiline is now set. If it now yields FALSE, the
3508     expression must have had ^ starting some of its branches. Check to see if
3509     that is true for *all* branches, and if so, set the startline flag. */
3510    
3511     if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
3512     !is_anchored(re->code, match_block.multiline))
3513     {
3514     anchored = FALSE;
3515     if (is_startline(re->code)) startline = TRUE;
3516     }
3517    
3518     /* Set up the first character to match, if available. The first_char value is
3519     never set for an anchored regular expression, but the anchoring may be forced
3520     at run time, so we have to test for anchoring. The first char may be unset for
3521     an unanchored pattern, of course. If there's no first char and the pattern was
3522     studied, the may be a bitmap of possible first characters. However, we can
3523     use this only if the caseless state of the studying was correct. */
3524    
3525     if (!anchored)
3526     {
3527     if ((re->options & PCRE_FIRSTSET) != 0)
3528     {
3529     first_char = re->first_char;
3530     if (match_block.caseless) first_char = pcre_lcc[first_char];
3531     }
3532     else
3533     if (!startline && extra != NULL &&
3534     (extra->options & PCRE_STUDY_MAPPED) != 0 &&
3535     ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)
3536     start_bits = extra->start_bits;
3537     }
3538    
3539     /* Loop for unanchored matches; for anchored regexps the loop runs just once. */
3540    
3541     do
3542     {
3543 nigel 9 int rc;
3544 nigel 3 register int *iptr = match_block.offset_vector;
3545     register int *iend = iptr + resetcount;
3546    
3547     /* Reset the maximum number of extractions we might see. */
3548    
3549     while (iptr < iend) *iptr++ = -1;
3550    
3551     /* Advance to a unique first char if possible */
3552    
3553     if (first_char >= 0)
3554     {
3555     if (match_block.caseless)
3556     while (start_match < end_subject && pcre_lcc[*start_match] != first_char)
3557     start_match++;
3558     else
3559     while (start_match < end_subject && *start_match != first_char)
3560     start_match++;
3561     }
3562    
3563     /* Or to just after \n for a multiline match if possible */
3564    
3565     else if (startline)
3566     {
3567     if (start_match > match_block.start_subject)
3568     {
3569     while (start_match < end_subject && start_match[-1] != '\n')
3570     start_match++;
3571     }
3572     }
3573    
3574     /* Or to a non-unique first char */
3575    
3576     else if (start_bits != NULL)
3577     {
3578     while (start_match < end_subject)
3579     {
3580     register int c = *start_match;
3581     if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
3582     }
3583     }
3584    
3585 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3586 nigel 3 printf(">>>> Match against: ");
3587     pchars(start_match, end_subject - start_match, TRUE, &match_block);
3588     printf("\n");
3589 nigel 9 #endif
3590 nigel 3
3591     /* When a match occurs, substrings will be set for all internal extractions;
3592     we just need to set up the whole thing as substring 0 before returning. If
3593     there were too many extractions, set the return code to zero. In the case
3594     where we had to get some local store to hold offsets for backreferences, copy
3595     those back references that we can. In this case there need not be overflow
3596     if certain parts of the pattern were not used.
3597    
3598     Before starting the match, we have to set up a longjmp() target to enable
3599 nigel 9 the "cut" operation to fail a match completely without backtracking. This
3600     is done in a separate function to avoid compiler warnings. We need not do
3601     it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3602     enabled. */
3603 nigel 3
3604 nigel 11 if ((re->options & PCRE_EXTRA) != 0)
3605     {
3606     if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3607     continue;
3608     }
3609     else if (!match(start_match, re->code, 2, &match_block)) continue;
3610 nigel 9
3611     /* Copy the offset information from temporary store if necessary */
3612    
3613 nigel 11 if (using_temporary_offsets)
3614 nigel 3 {
3615 nigel 9 if (offsetcount >= 4)
3616 nigel 3 {
3617 nigel 9 memcpy(offsets + 2, match_block.offset_vector + 2,
3618     (offsetcount - 2) * sizeof(int));
3619 nigel 11 DPRINTF(("Copied offsets from temporary memory\n"));
3620 nigel 3 }
3621 nigel 9 if (match_block.end_offset_top > offsetcount)
3622     match_block.offset_overflow = TRUE;
3623 nigel 3
3624 nigel 9 DPRINTF(("Freeing temporary memory\n"));
3625     (pcre_free)(match_block.offset_vector);
3626     }
3627 nigel 3
3628 nigel 9 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
3629 nigel 3
3630 nigel 9 if (match_block.offset_end < 2) rc = 0; else
3631     {
3632     offsets[0] = start_match - match_block.start_subject;
3633     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3634 nigel 3 }
3635 nigel 9
3636     DPRINTF((">>>> returning %d\n", rc));
3637     return rc;
3638 nigel 3 }
3639     while (!anchored &&
3640     match_block.errorcode == PCRE_ERROR_NOMATCH &&
3641     start_match++ < end_subject);
3642    
3643 nigel 11 if (using_temporary_offsets)
3644     {
3645     DPRINTF(("Freeing temporary memory\n"));
3646     (pcre_free)(match_block.offset_vector);
3647     }
3648    
3649 nigel 9 DPRINTF((">>>> returning %d\n", match_block.errorcode));
3650 nigel 3
3651     return match_block.errorcode;
3652     }
3653    
3654     /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12