/[pcre]/code/trunk/pcre.c
ViewVC logotype

Contents of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 17 - (hide annotations) (download)
Sat Feb 24 21:38:29 2007 UTC (6 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 103700 byte(s)
Load pcre-1.07 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12 nigel 15 Copyright (c) 1998 University of Cambridge
13 nigel 3
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Define DEBUG to get debugging output on stdout. */
33    
34     /* #define DEBUG */
35    
36 nigel 9 /* Use a macro for debugging printing, 'cause that eliminates the the use
37     of #ifdef inline, and there are *still* stupid compilers about that don't like
38     indented pre-processor statements. I suppose it's only been 10 years... */
39 nigel 3
40 nigel 9 #ifdef DEBUG
41     #define DPRINTF(p) printf p
42     #else
43     #define DPRINTF(p) /*nothing*/
44     #endif
45    
46 nigel 3 /* Include the internals header, which itself includes Standard C headers plus
47     the external pcre header. */
48    
49     #include "internal.h"
50    
51    
52 nigel 15 /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54     #ifdef __cplusplus
55     #define class pcre_class
56     #endif
57    
58    
59 nigel 3 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61 nigel 15 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63 nigel 3
64 nigel 13 /* Text forms of OP_ values and things, for debugging (not all used) */
65 nigel 3
66     #ifdef DEBUG
67 nigel 7 static const char *OP_names[] = {
68     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70     "not",
71     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74     "*", "*?", "+", "+?", "?", "??", "{", "{",
75 nigel 13 "class", "negclass", "Ref",
76 nigel 3 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77     "Brazero", "Braminzero", "Bra"
78     };
79     #endif
80    
81     /* Table for handling escaped characters in the range '0'-'z'. Positive returns
82     are simple data values; negative values are for special things like \d and so
83     on. Zero means further processing is needed (for things like \x), or the escape
84     is invalid. */
85    
86 nigel 15 static const short int escapes[] = {
87 nigel 3 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
88     0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
89     '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
90     0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
91     0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
92     0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
93     '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
94     0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
95     0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
96     0, 0, 0 /* x - z */
97     };
98    
99     /* Definition to allow mutual recursion */
100    
101 nigel 13 static BOOL
102     compile_regex(int, int *, uschar **, const uschar **, const char **);
103 nigel 3
104     /* Structure for passing "static" information around between the functions
105     doing the matching, so that they are thread-safe. */
106    
107     typedef struct match_data {
108     int errorcode; /* As it says */
109     int *offset_vector; /* Offset vector */
110     int offset_end; /* One past the end */
111     BOOL offset_overflow; /* Set if too many extractions */
112     BOOL caseless; /* Case-independent flag */
113     BOOL runtime_caseless; /* Caseless forced at run time */
114     BOOL multiline; /* Multiline flag */
115     BOOL notbol; /* NOTBOL flag */
116     BOOL noteol; /* NOTEOL flag */
117     BOOL dotall; /* Dot matches any char */
118     BOOL endonly; /* Dollar not before final \n */
119 nigel 7 const uschar *start_subject; /* Start of the subject string */
120     const uschar *end_subject; /* End of the subject string */
121 nigel 3 jmp_buf fail_env; /* Environment for longjump() break out */
122 nigel 7 const uschar *end_match_ptr; /* Subject position at end match */
123 nigel 3 int end_offset_top; /* Highwater mark at end of match */
124     } match_data;
125    
126    
127    
128     /*************************************************
129     * Global variables *
130     *************************************************/
131    
132     /* PCRE is thread-clean and doesn't use any global variables in the normal
133     sense. However, it calls memory allocation and free functions via the two
134     indirections below, which are can be changed by the caller, but are shared
135     between all threads. */
136    
137     void *(*pcre_malloc)(size_t) = malloc;
138     void (*pcre_free)(void *) = free;
139    
140    
141    
142    
143     /*************************************************
144     * Return version string *
145     *************************************************/
146    
147 nigel 7 const char *
148 nigel 3 pcre_version(void)
149     {
150     return PCRE_VERSION;
151     }
152    
153    
154    
155    
156     /*************************************************
157     * Return info about a compiled pattern *
158     *************************************************/
159    
160     /* This function picks potentially useful data out of the private
161     structure.
162    
163     Arguments:
164     external_re points to compiled code
165     optptr where to pass back the options
166     first_char where to pass back the first character,
167     or -1 if multiline and all branches start ^,
168     or -2 otherwise
169    
170     Returns: number of identifying extraction brackets
171     or negative values on error
172     */
173    
174     int
175     pcre_info(const pcre *external_re, int *optptr, int *first_char)
176     {
177 nigel 7 const real_pcre *re = (const real_pcre *)external_re;
178 nigel 3 if (re == NULL) return PCRE_ERROR_NULL;
179     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180     if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
181     if (first_char != NULL)
182     *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
183     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
184     return re->top_bracket;
185     }
186    
187    
188    
189    
190     #ifdef DEBUG
191     /*************************************************
192     * Debugging function to print chars *
193     *************************************************/
194    
195     /* Print a sequence of chars in printable format, stopping at the end of the
196     subject if the requested.
197    
198     Arguments:
199     p points to characters
200     length number to print
201     is_subject TRUE if printing from within md->start_subject
202     md pointer to matching data block, if is_subject is TRUE
203    
204     Returns: nothing
205     */
206    
207 nigel 9 static void
208     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209 nigel 3 {
210     int c;
211     if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
212     while (length-- > 0)
213     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
214     }
215     #endif
216    
217    
218    
219    
220     /*************************************************
221     * Check subpattern for empty operand *
222     *************************************************/
223    
224     /* This function checks a bracketed subpattern to see if any of the paths
225     through it could match an empty string. This is used to diagnose an error if
226     such a subpattern is followed by a quantifier with an unlimited upper bound.
227    
228     Argument:
229     code points to the opening bracket
230    
231     Returns: TRUE or FALSE
232     */
233    
234     static BOOL
235     could_be_empty(uschar *code)
236     {
237     do {
238     uschar *cc = code + 3;
239    
240     /* Scan along the opcodes for this branch; as soon as we find something
241     that matches a non-empty string, break out and advance to test the next
242     branch. If we get to the end of the branch, return TRUE for the whole
243     sub-expression. */
244    
245     for (;;)
246     {
247     /* Test an embedded subpattern; if it could not be empty, break the
248     loop. Otherwise carry on in the branch. */
249    
250 nigel 5 if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
251 nigel 3 {
252     if (!could_be_empty(cc)) break;
253     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
254     cc += 3;
255     }
256    
257     else switch (*cc)
258     {
259     /* Reached end of a branch: the subpattern may match the empty string */
260    
261     case OP_ALT:
262     case OP_KET:
263     case OP_KETRMAX:
264     case OP_KETRMIN:
265     return TRUE;
266    
267 nigel 17 /* Skip over entire bracket groups with zero lower bound */
268    
269     case OP_BRAZERO:
270     case OP_BRAMINZERO:
271     cc++;
272     /* Fall through */
273    
274 nigel 3 /* Skip over assertive subpatterns */
275    
276     case OP_ASSERT:
277     case OP_ASSERT_NOT:
278     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
279     cc += 3;
280     break;
281    
282     /* Skip over things that don't match chars */
283    
284     case OP_SOD:
285     case OP_EOD:
286     case OP_CIRC:
287     case OP_DOLL:
288     case OP_NOT_WORD_BOUNDARY:
289     case OP_WORD_BOUNDARY:
290     cc++;
291     break;
292    
293     /* Skip over simple repeats with zero lower bound */
294    
295     case OP_STAR:
296     case OP_MINSTAR:
297     case OP_QUERY:
298     case OP_MINQUERY:
299 nigel 5 case OP_NOTSTAR:
300     case OP_NOTMINSTAR:
301     case OP_NOTQUERY:
302     case OP_NOTMINQUERY:
303 nigel 3 case OP_TYPESTAR:
304     case OP_TYPEMINSTAR:
305     case OP_TYPEQUERY:
306     case OP_TYPEMINQUERY:
307     cc += 2;
308     break;
309    
310     /* Skip over UPTOs (lower bound is zero) */
311    
312     case OP_UPTO:
313     case OP_MINUPTO:
314     case OP_TYPEUPTO:
315     case OP_TYPEMINUPTO:
316     cc += 4;
317     break;
318    
319     /* Check a class or a back reference for a zero minimum */
320    
321     case OP_CLASS:
322 nigel 13 case OP_NEGCLASS:
323 nigel 3 case OP_REF:
324 nigel 5 cc += (*cc == OP_REF)? 2 : 33;
325 nigel 3
326     switch (*cc)
327     {
328     case OP_CRSTAR:
329     case OP_CRMINSTAR:
330     case OP_CRQUERY:
331     case OP_CRMINQUERY:
332     cc++;
333     break;
334    
335     case OP_CRRANGE:
336     case OP_CRMINRANGE:
337     if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;
338     cc += 3;
339     break;
340    
341     default:
342     goto NEXT_BRANCH;
343     }
344     break;
345    
346     /* Anything else matches at least one character */
347    
348     default:
349     goto NEXT_BRANCH;
350     }
351     }
352    
353     NEXT_BRANCH:
354     code += (code[1] << 8) + code[2];
355     }
356     while (*code == OP_ALT);
357    
358     /* No branches match the empty string */
359    
360     return FALSE;
361     }
362    
363    
364    
365     /*************************************************
366     * Handle escapes *
367     *************************************************/
368    
369     /* This function is called when a \ has been encountered. It either returns a
370     positive value for a simple escape such as \n, or a negative value which
371     encodes one of the more complicated things such as \d. On entry, ptr is
372     pointing at the \. On exit, it is on the final character of the escape
373     sequence.
374    
375     Arguments:
376     ptrptr points to the pattern position pointer
377     errorptr points to the pointer to the error message
378     bracount number of previous extracting brackets
379     options the options bits
380     isclass TRUE if inside a character class
381    
382     Returns: zero or positive => a data character
383     negative => a special escape sequence
384     on error, errorptr is set
385     */
386    
387     static int
388 nigel 7 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
389     int options, BOOL isclass)
390 nigel 3 {
391 nigel 7 const uschar *ptr = *ptrptr;
392 nigel 3 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
393     int i;
394    
395     if (c == 0) *errorptr = ERR1;
396    
397     /* Digits or letters may have special meaning; all others are literals. */
398    
399     else if (c < '0' || c > 'z') {}
400    
401     /* Do an initial lookup in a table. A non-zero result is something that can be
402     returned immediately. Otherwise further processing may be required. */
403    
404     else if ((i = escapes[c - '0']) != 0) c = i;
405    
406     /* Escapes that need further processing, or are illegal. */
407    
408     else
409     {
410 nigel 7 const uschar *oldptr;
411 nigel 3 switch (c)
412     {
413     /* The handling of escape sequences consisting of a string of digits
414     starting with one that is not zero is not straightforward. By experiment,
415     the way Perl works seems to be as follows:
416    
417     Outside a character class, the digits are read as a decimal number. If the
418     number is less than 10, or if there are that many previous extracting
419     left brackets, then it is a back reference. Otherwise, up to three octal
420     digits are read to form an escaped byte. Thus \123 is likely to be octal
421     123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
422     value is greater than 377, the least significant 8 bits are taken. Inside a
423     character class, \ followed by a digit is always an octal number. */
424    
425     case '1': case '2': case '3': case '4': case '5':
426     case '6': case '7': case '8': case '9':
427    
428     if (!isclass)
429     {
430     oldptr = ptr;
431     c -= '0';
432     while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)
433     c = c * 10 + *(++ptr) - '0';
434     if (c < 10 || c <= bracount)
435     {
436     c = -(ESC_REF + c);
437     break;
438     }
439     ptr = oldptr; /* Put the pointer back and fall through */
440     }
441    
442     /* Handle an octal number following \. If the first digit is 8 or 9, Perl
443     generates a binary zero byte and treats the digit as a following literal.
444     Thus we have to pull back the pointer by one. */
445    
446     if ((c = *ptr) >= '8')
447     {
448     ptr--;
449     c = 0;
450     break;
451     }
452    
453     /* \0 always starts an octal number, but we may drop through to here with a
454     larger first octal digit */
455    
456     case '0':
457     c -= '0';
458     while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&
459     ptr[1] != '8' && ptr[1] != '9')
460     c = c * 8 + *(++ptr) - '0';
461     break;
462    
463     /* Special escapes not starting with a digit are straightforward */
464    
465     case 'x':
466     c = 0;
467     while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)
468     {
469     ptr++;
470     c = c * 16 + pcre_lcc[*ptr] -
471     (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
472     }
473     break;
474    
475     case 'c':
476     c = *(++ptr);
477     if (c == 0)
478     {
479     *errorptr = ERR2;
480     return 0;
481     }
482    
483     /* A letter is upper-cased; then the 0x40 bit is flipped */
484    
485     if (c >= 'a' && c <= 'z') c = pcre_fcc[c];
486     c ^= 0x40;
487     break;
488    
489     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
490     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
491     for Perl compatibility, it is a literal. */
492    
493     default:
494     if ((options & PCRE_EXTRA) != 0) switch(c)
495     {
496     case 'X':
497     c = -ESC_X; /* This could be a lookup if it ever got into Perl */
498     break;
499    
500     default:
501     *errorptr = ERR3;
502     break;
503     }
504     break;
505     }
506     }
507    
508     *ptrptr = ptr;
509     return c;
510     }
511    
512    
513    
514     /*************************************************
515     * Check for counted repeat *
516     *************************************************/
517    
518     /* This function is called when a '{' is encountered in a place where it might
519     start a quantifier. It looks ahead to see if it really is a quantifier or not.
520     It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
521     where the ddds are digits.
522    
523     Arguments:
524     p pointer to the first char after '{'
525    
526     Returns: TRUE or FALSE
527     */
528    
529     static BOOL
530 nigel 7 is_counted_repeat(const uschar *p)
531 nigel 3 {
532     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
533     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
534     if (*p == '}') return TRUE;
535    
536     if (*p++ != ',') return FALSE;
537     if (*p == '}') return TRUE;
538    
539     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
540     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
541     return (*p == '}');
542     }
543    
544    
545    
546     /*************************************************
547     * Read repeat counts *
548     *************************************************/
549    
550     /* Read an item of the form {n,m} and return the values. This is called only
551     after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
552     so the syntax is guaranteed to be correct, but we need to check the values.
553    
554     Arguments:
555     p pointer to first char after '{'
556     minp pointer to int for min
557     maxp pointer to int for max
558     returned as -1 if no max
559     errorptr points to pointer to error message
560    
561     Returns: pointer to '}' on success;
562     current ptr on error, with errorptr set
563     */
564    
565 nigel 7 static const uschar *
566     read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
567 nigel 3 {
568     int min = 0;
569     int max = -1;
570    
571     while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
572    
573     if (*p == '}') max = min; else
574     {
575     if (*(++p) != '}')
576     {
577     max = 0;
578     while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
579     if (max < min)
580     {
581     *errorptr = ERR4;
582     return p;
583     }
584     }
585     }
586    
587     /* Do paranoid checks, then fill in the required variables, and pass back the
588     pointer to the terminating '}'. */
589    
590     if (min > 65535 || max > 65535)
591     *errorptr = ERR5;
592     else
593     {
594     *minp = min;
595     *maxp = max;
596     }
597     return p;
598     }
599    
600    
601    
602     /*************************************************
603     * Compile one branch *
604     *************************************************/
605    
606     /* Scan the pattern, compiling it into the code vector.
607    
608     Arguments:
609     options the option bits
610     bracket points to number of brackets used
611     code points to the pointer to the current code point
612     ptrptr points to the current pattern pointer
613     errorptr points to pointer to error message
614    
615     Returns: TRUE on success
616     FALSE, with *errorptr set on error
617     */
618    
619     static BOOL
620 nigel 7 compile_branch(int options, int *brackets, uschar **codeptr,
621     const uschar **ptrptr, const char **errorptr)
622 nigel 3 {
623     int repeat_type, op_type;
624     int repeat_min, repeat_max;
625     int bravalue, length;
626     register int c;
627     register uschar *code = *codeptr;
628 nigel 7 const uschar *ptr = *ptrptr;
629     const uschar *oldptr;
630 nigel 3 uschar *previous = NULL;
631     uschar class[32];
632    
633     /* Switch on next character until the end of the branch */
634    
635     for (;; ptr++)
636     {
637     BOOL negate_class;
638     int class_charcount;
639     int class_lastchar;
640    
641     c = *ptr;
642     if ((options & PCRE_EXTENDED) != 0)
643     {
644     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
645     if (c == '#')
646     {
647     while ((c = *(++ptr)) != 0 && c != '\n');
648     continue;
649     }
650     }
651    
652     switch(c)
653     {
654     /* The branch terminates at end of string, |, or ). */
655    
656     case 0:
657     case '|':
658     case ')':
659     *codeptr = code;
660     *ptrptr = ptr;
661     return TRUE;
662    
663     /* Handle single-character metacharacters */
664    
665     case '^':
666     previous = NULL;
667     *code++ = OP_CIRC;
668     break;
669    
670     case '$':
671     previous = NULL;
672     *code++ = OP_DOLL;
673     break;
674    
675     case '.':
676     previous = code;
677     *code++ = OP_ANY;
678     break;
679    
680     /* Character classes. These always build a 32-byte bitmap of the permitted
681     characters, except in the special case where there is only one character.
682     For negated classes, we build the map as usual, then invert it at the end.
683     */
684    
685     case '[':
686     previous = code;
687    
688 nigel 13 /* If the first character is '^', set the negation flag, and use a
689     different opcode. This only matters if caseless matching is specified at
690     runtime. */
691 nigel 3
692     if ((c = *(++ptr)) == '^')
693     {
694     negate_class = TRUE;
695 nigel 13 *code++ = OP_NEGCLASS;
696 nigel 3 c = *(++ptr);
697     }
698 nigel 13 else
699     {
700     negate_class = FALSE;
701     *code++ = OP_CLASS;
702     }
703 nigel 3
704     /* Keep a count of chars so that we can optimize the case of just a single
705     character. */
706    
707     class_charcount = 0;
708     class_lastchar = -1;
709    
710     /* Initialize the 32-char bit map to all zeros. We have to build the
711     map in a temporary bit of store, in case the class contains only 1
712     character, because in that case the compiled code doesn't use the
713     bit map. */
714    
715     memset(class, 0, 32 * sizeof(uschar));
716    
717     /* Process characters until ] is reached. By writing this as a "do" it
718     means that an initial ] is taken as a data character. */
719    
720     do
721     {
722     if (c == 0)
723     {
724     *errorptr = ERR6;
725     goto FAILED;
726     }
727    
728     /* Backslash may introduce a single character, or it may introduce one
729     of the specials, which just set a flag. Escaped items are checked for
730     validity in the pre-compiling pass. The sequence \b is a special case.
731 nigel 7 Inside a class (and only there) it is treated as backspace. Elsewhere
732 nigel 3 it marks a word boundary. Other escapes have preset maps ready to
733     or into the one we are building. We assume they have more than one
734     character in them, so set class_count bigger than one. */
735    
736     if (c == '\\')
737     {
738     c = check_escape(&ptr, errorptr, *brackets, options, TRUE);
739     if (-c == ESC_b) c = '\b';
740     else if (c < 0)
741     {
742     class_charcount = 10;
743     switch (-c)
744     {
745     case ESC_d:
746     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];
747     continue;
748    
749     case ESC_D:
750     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];
751     continue;
752    
753     case ESC_w:
754     for (c = 0; c < 32; c++)
755     class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
756     continue;
757    
758     case ESC_W:
759     for (c = 0; c < 32; c++)
760     class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
761     continue;
762    
763     case ESC_s:
764     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];
765     continue;
766    
767     case ESC_S:
768     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];
769     continue;
770    
771     default:
772     *errorptr = ERR7;
773     goto FAILED;
774     }
775     }
776     /* Fall through if single character */
777     }
778    
779     /* A single character may be followed by '-' to form a range. However,
780     Perl does not permit ']' to be the end of the range. A '-' character
781     here is treated as a literal. */
782    
783     if (ptr[1] == '-' && ptr[2] != ']')
784     {
785     int d;
786     ptr += 2;
787     d = *ptr;
788    
789     if (d == 0)
790     {
791     *errorptr = ERR6;
792     goto FAILED;
793     }
794    
795     /* The second part of a range can be a single-character escape, but
796     not any of the other escapes. */
797    
798     if (d == '\\')
799     {
800     d = check_escape(&ptr, errorptr, *brackets, options, TRUE);
801     if (d < 0)
802     {
803     if (d == -ESC_b) d = '\b'; else
804     {
805     *errorptr = ERR7;
806     goto FAILED;
807     }
808     }
809     }
810    
811     if (d < c)
812     {
813     *errorptr = ERR8;
814     goto FAILED;
815     }
816    
817     for (; c <= d; c++)
818     {
819     class[c/8] |= (1 << (c&7));
820     if ((options & PCRE_CASELESS) != 0)
821     {
822     int uc = pcre_fcc[c]; /* flip case */
823     class[uc/8] |= (1 << (uc&7));
824     }
825     class_charcount++; /* in case a one-char range */
826     class_lastchar = c;
827     }
828     continue; /* Go get the next char in the class */
829     }
830    
831     /* Handle a lone single character - we can get here for a normal
832     non-escape char, or after \ that introduces a single character. */
833    
834     class [c/8] |= (1 << (c&7));
835     if ((options & PCRE_CASELESS) != 0)
836     {
837     c = pcre_fcc[c]; /* flip case */
838     class[c/8] |= (1 << (c&7));
839     }
840     class_charcount++;
841     class_lastchar = c;
842     }
843    
844     /* Loop until ']' reached; the check for end of string happens inside the
845     loop. This "while" is the end of the "do" above. */
846    
847     while ((c = *(++ptr)) != ']');
848    
849     /* If class_charcount is 1 and class_lastchar is not negative, we saw
850     precisely one character. This doesn't need the whole 32-byte bit map.
851     We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
852     it's negative. */
853    
854     if (class_charcount == 1 && class_lastchar >= 0)
855     {
856     if (negate_class)
857     {
858     code[-1] = OP_NOT;
859     }
860     else
861     {
862     code[-1] = OP_CHARS;
863     *code++ = 1;
864     }
865     *code++ = class_lastchar;
866     }
867    
868     /* Otherwise, negate the 32-byte map if necessary, and copy it into
869     the code vector. */
870    
871     else
872     {
873     if (negate_class)
874     for (c = 0; c < 32; c++) code[c] = ~class[c];
875     else
876     memcpy(code, class, 32);
877     code += 32;
878     }
879     break;
880    
881     /* Various kinds of repeat */
882    
883     case '{':
884     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
885     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);
886     if (*errorptr != NULL) goto FAILED;
887     goto REPEAT;
888    
889     case '*':
890     repeat_min = 0;
891     repeat_max = -1;
892     goto REPEAT;
893    
894     case '+':
895     repeat_min = 1;
896     repeat_max = -1;
897     goto REPEAT;
898    
899     case '?':
900     repeat_min = 0;
901     repeat_max = 1;
902    
903     REPEAT:
904     if (previous == NULL)
905     {
906     *errorptr = ERR9;
907     goto FAILED;
908     }
909    
910     /* If the next character is '?' this is a minimizing repeat. Advance to the
911     next character. */
912    
913     if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
914    
915     /* If the maximum is zero then the minimum must also be zero; Perl allows
916     this case, so we do too - by simply omitting the item altogether. */
917    
918     if (repeat_max == 0) code = previous;
919    
920     /* If previous was a string of characters, chop off the last one and use it
921     as the subject of the repeat. If there was only one character, we can
922     abolish the previous item altogether. */
923    
924     else if (*previous == OP_CHARS)
925     {
926     int len = previous[1];
927     if (len == 1)
928     {
929     c = previous[2];
930     code = previous;
931     }
932     else
933     {
934     c = previous[len+1];
935     previous[1]--;
936     code--;
937     }
938     op_type = 0; /* Use single-char op codes */
939     goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
940     }
941    
942     /* If previous was a single negated character ([^a] or similar), we use
943     one of the special opcodes, replacing it. The code is shared with single-
944     character repeats by adding a suitable offset into repeat_type. */
945    
946     else if ((int)*previous == OP_NOT)
947     {
948     op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
949     c = previous[1];
950     code = previous;
951     goto OUTPUT_SINGLE_REPEAT;
952     }
953    
954     /* If previous was a character type match (\d or similar), abolish it and
955     create a suitable repeat item. The code is shared with single-character
956     repeats by adding a suitable offset into repeat_type. */
957    
958     else if ((int)*previous < OP_EOD || *previous == OP_ANY)
959     {
960     op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
961     c = *previous;
962     code = previous;
963    
964     OUTPUT_SINGLE_REPEAT:
965     repeat_type += op_type; /* Combine both values for many cases */
966    
967     /* A minimum of zero is handled either as the special case * or ?, or as
968     an UPTO, with the maximum given. */
969    
970     if (repeat_min == 0)
971     {
972     if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
973     else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
974     else
975     {
976     *code++ = OP_UPTO + repeat_type;
977     *code++ = repeat_max >> 8;
978     *code++ = (repeat_max & 255);
979     }
980     }
981    
982     /* The case {1,} is handled as the special case + */
983    
984     else if (repeat_min == 1 && repeat_max == -1)
985     *code++ = OP_PLUS + repeat_type;
986    
987     /* The case {n,n} is just an EXACT, while the general case {n,m} is
988     handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
989    
990     else
991     {
992     if (repeat_min != 1)
993     {
994     *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
995     *code++ = repeat_min >> 8;
996     *code++ = (repeat_min & 255);
997     }
998    
999     /* If the mininum is 1 and the previous item was a character string,
1000     we either have to put back the item that got cancelled if the string
1001     length was 1, or add the character back onto the end of a longer
1002     string. For a character type nothing need be done; it will just get put
1003     back naturally. */
1004    
1005     else if (*previous == OP_CHARS)
1006     {
1007     if (code == previous) code += 2; else previous[1]++;
1008     }
1009    
1010 nigel 9 /* If the maximum is unlimited, insert an OP_STAR. */
1011 nigel 3
1012 nigel 9 if (repeat_max < 0)
1013 nigel 3 {
1014     *code++ = c;
1015 nigel 9 *code++ = OP_STAR + repeat_type;
1016     }
1017    
1018     /* Else insert an UPTO if the max is greater than the min. */
1019    
1020     else if (repeat_max != repeat_min)
1021     {
1022     *code++ = c;
1023 nigel 3 repeat_max -= repeat_min;
1024     *code++ = OP_UPTO + repeat_type;
1025     *code++ = repeat_max >> 8;
1026     *code++ = (repeat_max & 255);
1027     }
1028     }
1029    
1030     /* The character or character type itself comes last in all cases. */
1031    
1032     *code++ = c;
1033     }
1034    
1035     /* If previous was a character class or a back reference, we put the repeat
1036     stuff after it. */
1037    
1038 nigel 13 else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1039     *previous == OP_REF)
1040 nigel 3 {
1041     if (repeat_min == 0 && repeat_max == -1)
1042     *code++ = OP_CRSTAR + repeat_type;
1043     else if (repeat_min == 1 && repeat_max == -1)
1044     *code++ = OP_CRPLUS + repeat_type;
1045     else if (repeat_min == 0 && repeat_max == 1)
1046     *code++ = OP_CRQUERY + repeat_type;
1047     else
1048     {
1049     *code++ = OP_CRRANGE + repeat_type;
1050     *code++ = repeat_min >> 8;
1051     *code++ = repeat_min & 255;
1052     if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1053     *code++ = repeat_max >> 8;
1054     *code++ = repeat_max & 255;
1055     }
1056     }
1057    
1058     /* If previous was a bracket group, we may have to replicate it in certain
1059     cases. If the maximum repeat count is unlimited, check that the bracket
1060     group cannot match the empty string, and diagnose an error if it can. */
1061    
1062     else if ((int)*previous >= OP_BRA)
1063     {
1064     int i;
1065 nigel 9 int len = code - previous;
1066 nigel 3
1067     if (repeat_max == -1 && could_be_empty(previous))
1068     {
1069     *errorptr = ERR10;
1070     goto FAILED;
1071     }
1072    
1073     /* If the minimum is greater than zero, and the maximum is unlimited or
1074     equal to the minimum, the first copy remains where it is, and is
1075     replicated up to the minimum number of times. This case includes the +
1076     repeat, but of course no replication is needed in that case. */
1077    
1078     if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))
1079     {
1080     for (i = 1; i < repeat_min; i++)
1081     {
1082 nigel 9 memcpy(code, previous, len);
1083     code += len;
1084 nigel 3 }
1085     }
1086    
1087     /* If the minimum is zero, stick BRAZERO in front of the first copy.
1088     Then, if there is a fixed upper limit, replicated up to that many times,
1089     sticking BRAZERO in front of all the optional ones. */
1090    
1091     else
1092     {
1093     if (repeat_min == 0)
1094     {
1095 nigel 9 memmove(previous+1, previous, len);
1096 nigel 3 code++;
1097     *previous++ = OP_BRAZERO + repeat_type;
1098     }
1099    
1100     for (i = 1; i < repeat_min; i++)
1101     {
1102 nigel 9 memcpy(code, previous, len);
1103     code += len;
1104 nigel 3 }
1105    
1106     for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1107     {
1108     *code++ = OP_BRAZERO + repeat_type;
1109 nigel 9 memcpy(code, previous, len);
1110     code += len;
1111 nigel 3 }
1112     }
1113    
1114     /* If the maximum is unlimited, set a repeater in the final copy. */
1115    
1116     if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;
1117     }
1118    
1119     /* Else there's some kind of shambles */
1120    
1121     else
1122     {
1123     *errorptr = ERR11;
1124     goto FAILED;
1125     }
1126    
1127     /* In all case we no longer have a previous item. */
1128    
1129     previous = NULL;
1130     break;
1131    
1132    
1133     /* Start of nested bracket sub-expression, or comment or lookahead.
1134     First deal with special things that can come after a bracket; all are
1135     introduced by ?, and the appearance of any of them means that this is not a
1136     referencing group. They were checked for validity in the first pass over
1137     the string, so we don't have to check for syntax errors here. */
1138    
1139     case '(':
1140     previous = code; /* Only real brackets can be repeated */
1141     if (*(++ptr) == '?')
1142     {
1143     bravalue = OP_BRA;
1144    
1145     switch (*(++ptr))
1146     {
1147     case '#':
1148     case 'i':
1149     case 'm':
1150     case 's':
1151     case 'x':
1152     ptr++;
1153     while (*ptr != ')') ptr++;
1154     previous = NULL;
1155     continue;
1156    
1157     case ':': /* Non-extracting bracket */
1158     ptr++;
1159     break;
1160    
1161     case '=': /* Assertions can't be repeated */
1162     bravalue = OP_ASSERT;
1163     ptr++;
1164     previous = NULL;
1165     break;
1166    
1167     case '!':
1168     bravalue = OP_ASSERT_NOT;
1169     ptr++;
1170     previous = NULL;
1171     break;
1172    
1173     case '>': /* "Match once" brackets */
1174     if ((options & PCRE_EXTRA) != 0) /* Not yet standard */
1175     {
1176     bravalue = OP_ONCE;
1177     ptr++;
1178     previous = NULL;
1179     break;
1180     }
1181     /* Else fall through */
1182    
1183     default:
1184     *errorptr = ERR12;
1185     goto FAILED;
1186     }
1187     }
1188    
1189     /* Else we have a referencing group */
1190    
1191     else
1192     {
1193     if (++(*brackets) > EXTRACT_MAX)
1194     {
1195     *errorptr = ERR13;
1196     goto FAILED;
1197     }
1198     bravalue = OP_BRA + *brackets;
1199     }
1200    
1201     /* Process nested bracketed re; at end pointer is on the bracket. We copy
1202     code into a non-register variable in order to be able to pass its address
1203     because some compilers complain otherwise. */
1204    
1205     *code = bravalue;
1206     {
1207     uschar *mcode = code;
1208     if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))
1209     goto FAILED;
1210     code = mcode;
1211     }
1212    
1213     if (*ptr != ')')
1214     {
1215     *errorptr = ERR14;
1216     goto FAILED;
1217     }
1218     break;
1219    
1220     /* Check \ for being a real metacharacter; if not, fall through and handle
1221     it as a data character at the start of a string. Escape items are checked
1222     for validity in the pre-compiling pass. */
1223    
1224     case '\\':
1225     oldptr = ptr;
1226     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1227    
1228     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1229     are arranged to be the negation of the corresponding OP_values. For the
1230     back references, the values are ESC_REF plus the reference number. Only
1231     back references and those types that consume a character may be repeated.
1232     We can test for values between ESC_b and ESC_Z for the latter; this may
1233     have to change if any new ones are ever created. */
1234    
1235     if (c < 0)
1236     {
1237     if (-c >= ESC_REF)
1238     {
1239     int refnum = -c - ESC_REF;
1240     if (*brackets < refnum)
1241     {
1242     *errorptr = ERR15;
1243     goto FAILED;
1244     }
1245     previous = code;
1246     *code++ = OP_REF;
1247     *code++ = refnum;
1248     }
1249     else
1250     {
1251     previous = (-c > ESC_b && -c < ESC_X)? code : NULL;
1252     *code++ = -c;
1253     }
1254     continue;
1255     }
1256    
1257 nigel 7 /* Data character: reset and fall through */
1258 nigel 3
1259     ptr = oldptr;
1260     c = '\\';
1261    
1262     /* Handle a run of data characters until a metacharacter is encountered.
1263     The first character is guaranteed not to be whitespace or # when the
1264     extended flag is set. */
1265    
1266     NORMAL_CHAR:
1267     default:
1268     previous = code;
1269     *code = OP_CHARS;
1270     code += 2;
1271     length = 0;
1272    
1273     do
1274     {
1275     if ((options & PCRE_EXTENDED) != 0)
1276     {
1277     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
1278     if (c == '#')
1279     {
1280     while ((c = *(++ptr)) != 0 && c != '\n');
1281     if (c == 0) break;
1282     continue;
1283     }
1284     }
1285    
1286     /* Backslash may introduce a data char or a metacharacter. Escaped items
1287     are checked for validity in the pre-compiling pass. Stop the string
1288     before a metaitem. */
1289    
1290     if (c == '\\')
1291     {
1292     oldptr = ptr;
1293     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1294     if (c < 0) { ptr = oldptr; break; }
1295     }
1296    
1297     /* Ordinary character or single-char escape */
1298    
1299     *code++ = c;
1300     length++;
1301     }
1302    
1303     /* This "while" is the end of the "do" above. */
1304    
1305     while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1306    
1307     /* Compute the length and set it in the data vector, and advance to
1308     the next state. */
1309    
1310     previous[1] = length;
1311 nigel 15 if (length < 255) ptr--;
1312 nigel 3 break;
1313     }
1314     } /* end of big loop */
1315    
1316     /* Control never reaches here by falling through, only by a goto for all the
1317     error states. Pass back the position in the pattern so that it can be displayed
1318     to the user for diagnosing the error. */
1319    
1320     FAILED:
1321     *ptrptr = ptr;
1322     return FALSE;
1323     }
1324    
1325    
1326    
1327    
1328     /*************************************************
1329     * Compile sequence of alternatives *
1330     *************************************************/
1331    
1332     /* On entry, ptr is pointing past the bracket character, but on return
1333     it points to the closing bracket, or vertical bar, or end of string.
1334     The code variable is pointing at the byte into which the BRA operator has been
1335     stored.
1336    
1337     Argument:
1338     options the option bits
1339     brackets -> int containing the number of extracting brackets used
1340     codeptr -> the address of the current code pointer
1341     ptrptr -> the address of the current pattern pointer
1342     errorptr -> pointer to error message
1343    
1344     Returns: TRUE on success
1345     */
1346    
1347     static BOOL
1348 nigel 7 compile_regex(int options, int *brackets, uschar **codeptr,
1349     const uschar **ptrptr, const char **errorptr)
1350 nigel 3 {
1351 nigel 7 const uschar *ptr = *ptrptr;
1352 nigel 3 uschar *code = *codeptr;
1353     uschar *start_bracket = code;
1354    
1355     for (;;)
1356     {
1357     int length;
1358     uschar *last_branch = code;
1359    
1360     code += 3;
1361     if (!compile_branch(options, brackets, &code, &ptr, errorptr))
1362     {
1363     *ptrptr = ptr;
1364     return FALSE;
1365     }
1366    
1367     /* Fill in the length of the last branch */
1368    
1369     length = code - last_branch;
1370     last_branch[1] = length >> 8;
1371     last_branch[2] = length & 255;
1372    
1373     /* Reached end of expression, either ')' or end of pattern. Insert a
1374     terminating ket and the length of the whole bracketed item, and return,
1375     leaving the pointer at the terminating char. */
1376    
1377     if (*ptr != '|')
1378     {
1379     length = code - start_bracket;
1380     *code++ = OP_KET;
1381     *code++ = length >> 8;
1382     *code++ = length & 255;
1383     *codeptr = code;
1384     *ptrptr = ptr;
1385     return TRUE;
1386     }
1387    
1388     /* Another branch follows; insert an "or" node and advance the pointer. */
1389    
1390     *code = OP_ALT;
1391     ptr++;
1392     }
1393     /* Control never reaches here */
1394     }
1395    
1396    
1397    
1398     /*************************************************
1399     * Check for anchored expression *
1400     *************************************************/
1401    
1402     /* Try to find out if this is an anchored regular expression. Consider each
1403     alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1404     all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1405     it's anchored. However, if this is a multiline pattern, then only OP_SOD
1406     counts, since OP_CIRC can match in the middle.
1407    
1408     A branch is also implicitly anchored if it starts with .* because that will try
1409     the rest of the pattern at all possible matching points, so there is no point
1410     trying them again.
1411    
1412     Argument: points to start of expression (the bracket)
1413     Returns: TRUE or FALSE
1414     */
1415    
1416     static BOOL
1417 nigel 7 is_anchored(register const uschar *code, BOOL multiline)
1418 nigel 3 {
1419     do {
1420     int op = (int)code[3];
1421     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)
1422     { if (!is_anchored(code+3, multiline)) return FALSE; }
1423     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1424     { if (code[4] != OP_ANY) return FALSE; }
1425     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;
1426     code += (code[1] << 8) + code[2];
1427     }
1428     while (*code == OP_ALT);
1429     return TRUE;
1430     }
1431    
1432    
1433    
1434     /*************************************************
1435     * Check for start with \n line expression *
1436     *************************************************/
1437    
1438     /* This is called for multiline expressions to try to find out if every branch
1439     starts with ^ so that "first char" processing can be done to speed things up.
1440    
1441     Argument: points to start of expression (the bracket)
1442     Returns: TRUE or FALSE
1443     */
1444    
1445     static BOOL
1446 nigel 7 is_startline(const uschar *code)
1447 nigel 3 {
1448     do {
1449     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1450     { if (!is_startline(code+3)) return FALSE; }
1451     else if (code[3] != OP_CIRC) return FALSE;
1452     code += (code[1] << 8) + code[2];
1453     }
1454     while (*code == OP_ALT);
1455     return TRUE;
1456     }
1457    
1458    
1459    
1460     /*************************************************
1461     * Check for fixed first char *
1462     *************************************************/
1463    
1464     /* Try to find out if there is a fixed first character. This is called for
1465     unanchored expressions, as it speeds up their processing quite considerably.
1466     Consider each alternative branch. If they all start with the same char, or with
1467     a bracket all of whose alternatives start with the same char (recurse ad lib),
1468     then we return that char, otherwise -1.
1469    
1470     Argument: points to start of expression (the bracket)
1471     Returns: -1 or the fixed first char
1472     */
1473    
1474     static int
1475     find_firstchar(uschar *code)
1476     {
1477     register int c = -1;
1478     do
1479     {
1480     register int charoffset = 4;
1481    
1482     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1483     {
1484     register int d;
1485     if ((d = find_firstchar(code+3)) < 0) return -1;
1486     if (c < 0) c = d; else if (c != d) return -1;
1487     }
1488    
1489     else switch(code[3])
1490     {
1491     default:
1492     return -1;
1493    
1494     case OP_EXACT: /* Fall through */
1495     charoffset++;
1496    
1497     case OP_CHARS: /* Fall through */
1498     charoffset++;
1499    
1500     case OP_PLUS:
1501     case OP_MINPLUS:
1502     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;
1503     break;
1504     }
1505     code += (code[1] << 8) + code[2];
1506     }
1507     while (*code == OP_ALT);
1508     return c;
1509     }
1510    
1511    
1512    
1513     /*************************************************
1514     * Compile a Regular Expression *
1515     *************************************************/
1516    
1517     /* This function takes a string and returns a pointer to a block of store
1518     holding a compiled version of the expression.
1519    
1520     Arguments:
1521     pattern the regular expression
1522     options various option bits
1523     errorptr pointer to pointer to error text
1524     erroroffset ptr offset in pattern where error was detected
1525    
1526     Returns: pointer to compiled data block, or NULL on error,
1527     with errorptr and erroroffset set
1528     */
1529    
1530     pcre *
1531 nigel 7 pcre_compile(const char *pattern, int options, const char **errorptr,
1532 nigel 3 int *erroroffset)
1533     {
1534     real_pcre *re;
1535     int spaces = 0;
1536     int length = 3; /* For initial BRA plus length */
1537     int runlength;
1538     int c, size;
1539     int bracount = 0;
1540     int brastack[200];
1541     int top_backref = 0;
1542 nigel 7 unsigned int brastackptr = 0;
1543     uschar *code;
1544     const uschar *ptr;
1545 nigel 3
1546     #ifdef DEBUG
1547     uschar *code_base, *code_end;
1548     #endif
1549    
1550     /* We can't pass back an error message if errorptr is NULL; I guess the best we
1551     can do is just return NULL. */
1552    
1553     if (errorptr == NULL) return NULL;
1554     *errorptr = NULL;
1555    
1556     /* However, we can give a message for this error */
1557    
1558     if (erroroffset == NULL)
1559     {
1560     *errorptr = ERR16;
1561     return NULL;
1562     }
1563     *erroroffset = 0;
1564    
1565     if ((options & ~PUBLIC_OPTIONS) != 0)
1566     {
1567     *errorptr = ERR17;
1568     return NULL;
1569     }
1570    
1571 nigel 9 DPRINTF(("------------------------------------------------------------------\n"));
1572     DPRINTF(("%s\n", pattern));
1573 nigel 3
1574     /* The first thing to do is to make a pass over the pattern to compute the
1575     amount of store required to hold the compiled code. This does not have to be
1576     perfect as long as errors are overestimates. At the same time we can detect any
1577     internal flag settings. Make an attempt to correct for any counted white space
1578     if an "extended" flag setting appears late in the pattern. We can't be so
1579     clever for #-comments. */
1580    
1581 nigel 7 ptr = (const uschar *)(pattern - 1);
1582 nigel 3 while ((c = *(++ptr)) != 0)
1583     {
1584     int min, max;
1585     int class_charcount;
1586    
1587     if ((pcre_ctypes[c] & ctype_space) != 0)
1588     {
1589     if ((options & PCRE_EXTENDED) != 0) continue;
1590     spaces++;
1591     }
1592    
1593     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1594     {
1595     while ((c = *(++ptr)) != 0 && c != '\n');
1596     continue;
1597     }
1598    
1599     switch(c)
1600     {
1601     /* A backslashed item may be an escaped "normal" character or a
1602     character type. For a "normal" character, put the pointers and
1603     character back so that tests for whitespace etc. in the input
1604     are done correctly. */
1605    
1606     case '\\':
1607     {
1608 nigel 7 const uschar *save_ptr = ptr;
1609 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1610     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1611     if (c >= 0)
1612     {
1613     ptr = save_ptr;
1614     c = '\\';
1615     goto NORMAL_CHAR;
1616     }
1617     }
1618     length++;
1619    
1620     /* A back reference needs an additional char, plus either one or 5
1621     bytes for a repeat. We also need to keep the value of the highest
1622     back reference. */
1623    
1624     if (c <= -ESC_REF)
1625     {
1626     int refnum = -c - ESC_REF;
1627     if (refnum > top_backref) top_backref = refnum;
1628     length++; /* For single back reference */
1629     if (ptr[1] == '{' && is_counted_repeat(ptr+2))
1630     {
1631     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1632     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1633     if ((min == 0 && (max == 1 || max == -1)) ||
1634     (min == 1 && max == -1))
1635     length++;
1636     else length += 5;
1637     if (ptr[1] == '?') ptr++;
1638     }
1639     }
1640     continue;
1641    
1642     case '^':
1643     case '.':
1644     case '$':
1645     case '*': /* These repeats won't be after brackets; */
1646     case '+': /* those are handled separately */
1647     case '?':
1648     length++;
1649     continue;
1650    
1651     /* This covers the cases of repeats after a single char, metachar, class,
1652     or back reference. */
1653    
1654     case '{':
1655     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
1656     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
1657     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1658     if ((min == 0 && (max == 1 || max == -1)) ||
1659     (min == 1 && max == -1))
1660     length++;
1661     else
1662     {
1663     length--; /* Uncount the original char or metachar */
1664     if (min == 1) length++; else if (min > 0) length += 4;
1665     if (max > 0) length += 4; else length += 2;
1666     }
1667     if (ptr[1] == '?') ptr++;
1668     continue;
1669    
1670     /* An alternation contains an offset to the next branch or ket. */
1671     case '|':
1672     length += 3;
1673     continue;
1674    
1675     /* A character class uses 33 characters. Don't worry about character types
1676     that aren't allowed in classes - they'll get picked up during the compile.
1677     A character class that contains only one character uses 2 or 3 bytes,
1678     depending on whether it is negated or not. Notice this where we can. */
1679    
1680     case '[':
1681     class_charcount = 0;
1682     if (*(++ptr) == '^') ptr++;
1683     do
1684     {
1685     if (*ptr == '\\')
1686     {
1687 nigel 9 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1688 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1689 nigel 9 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1690 nigel 3 }
1691     else class_charcount++;
1692     ptr++;
1693     }
1694     while (*ptr != 0 && *ptr != ']');
1695    
1696     /* Repeats for negated single chars are handled by the general code */
1697    
1698     if (class_charcount == 1) length += 3; else
1699     {
1700     length += 33;
1701    
1702     /* A repeat needs either 1 or 5 bytes. */
1703    
1704 nigel 9 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1705 nigel 3 {
1706     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1707     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1708     if ((min == 0 && (max == 1 || max == -1)) ||
1709     (min == 1 && max == -1))
1710     length++;
1711     else length += 5;
1712     if (ptr[1] == '?') ptr++;
1713     }
1714     }
1715     continue;
1716    
1717     /* Brackets may be genuine groups or special things */
1718    
1719     case '(':
1720    
1721     /* Handle special forms of bracket, which all start (? */
1722    
1723     if (ptr[1] == '?') switch (c = ptr[2])
1724     {
1725     /* Skip over comments entirely */
1726     case '#':
1727     ptr += 3;
1728     while (*ptr != 0 && *ptr != ')') ptr++;
1729     if (*ptr == 0)
1730     {
1731     *errorptr = ERR18;
1732     goto PCRE_ERROR_RETURN;
1733     }
1734     continue;
1735    
1736     /* Non-referencing groups and lookaheads just move the pointer on, and
1737     then behave like a non-special bracket, except that they don't increment
1738     the count of extracting brackets. */
1739    
1740     case ':':
1741     case '=':
1742     case '!':
1743     ptr += 2;
1744     break;
1745    
1746     /* Ditto for the "once only" bracket, allowed only if the extra bit
1747     is set. */
1748    
1749     case '>':
1750     if ((options & PCRE_EXTRA) != 0)
1751     {
1752     ptr += 2;
1753     break;
1754     }
1755     /* Else fall thourh */
1756    
1757     /* Else loop setting valid options until ) is met. Anything else is an
1758     error. */
1759    
1760     default:
1761     ptr += 2;
1762     for (;; ptr++)
1763     {
1764     if ((c = *ptr) == 'i')
1765     {
1766     options |= PCRE_CASELESS;
1767     continue;
1768     }
1769     else if ((c = *ptr) == 'm')
1770     {
1771     options |= PCRE_MULTILINE;
1772     continue;
1773     }
1774     else if (c == 's')
1775     {
1776     options |= PCRE_DOTALL;
1777     continue;
1778     }
1779     else if (c == 'x')
1780     {
1781     options |= PCRE_EXTENDED;
1782     length -= spaces; /* Already counted spaces */
1783     continue;
1784     }
1785     else if (c == ')') break;
1786    
1787     *errorptr = ERR12;
1788     goto PCRE_ERROR_RETURN;
1789     }
1790     continue; /* End of this bracket handling */
1791     }
1792    
1793     /* Extracting brackets must be counted so we can process escapes in a
1794     Perlish way. */
1795    
1796     else bracount++;
1797    
1798     /* Non-special forms of bracket. Save length for computing whole length
1799     at end if there's a repeat that requires duplication of the group. */
1800    
1801     if (brastackptr >= sizeof(brastack)/sizeof(int))
1802     {
1803     *errorptr = ERR19;
1804     goto PCRE_ERROR_RETURN;
1805     }
1806    
1807     brastack[brastackptr++] = length;
1808     length += 3;
1809     continue;
1810    
1811     /* Handle ket. Look for subsequent max/min; for certain sets of values we
1812 nigel 9 have to replicate this bracket up to that many times. If brastackptr is
1813     0 this is an unmatched bracket which will generate an error, but take care
1814     not to try to access brastack[-1]. */
1815 nigel 3
1816     case ')':
1817     length += 3;
1818     {
1819 nigel 9 int minval = 1;
1820     int maxval = 1;
1821     int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1822 nigel 3
1823     /* Leave ptr at the final char; for read_repeat_counts this happens
1824     automatically; for the others we need an increment. */
1825    
1826     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1827     {
1828 nigel 9 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1829 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1830     }
1831 nigel 9 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1832     else if (c == '+') { maxval = -1; ptr++; }
1833     else if (c == '?') { minval = 0; ptr++; }
1834 nigel 3
1835 nigel 9 /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1836     if there is a limited maximum we have to replicate up to maxval-1 times
1837     and allow for a BRAZERO item before each optional copy, as we also have
1838     to do before the first copy if the minimum is zero. */
1839 nigel 3
1840 nigel 9 if (minval == 0) length++;
1841     else if (minval > 1) length += (minval - 1) * duplength;
1842     if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1843 nigel 3 }
1844     continue;
1845    
1846     /* Non-special character. For a run of such characters the length required
1847     is the number of characters + 2, except that the maximum run length is 255.
1848     We won't get a skipped space or a non-data escape or the start of a #
1849     comment as the first character, so the length can't be zero. */
1850    
1851     NORMAL_CHAR:
1852     default:
1853     length += 2;
1854     runlength = 0;
1855     do
1856     {
1857     if ((pcre_ctypes[c] & ctype_space) != 0)
1858     {
1859     if ((options & PCRE_EXTENDED) != 0) continue;
1860     spaces++;
1861     }
1862    
1863     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1864     {
1865     while ((c = *(++ptr)) != 0 && c != '\n');
1866     continue;
1867     }
1868    
1869     /* Backslash may introduce a data char or a metacharacter; stop the
1870     string before the latter. */
1871    
1872     if (c == '\\')
1873     {
1874 nigel 7 const uschar *saveptr = ptr;
1875 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1876     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1877     if (c < 0) { ptr = saveptr; break; }
1878     }
1879    
1880     /* Ordinary character or single-char escape */
1881    
1882     runlength++;
1883     }
1884    
1885     /* This "while" is the end of the "do" above. */
1886    
1887     while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1888    
1889     ptr--;
1890     length += runlength;
1891     continue;
1892     }
1893     }
1894    
1895     length += 4; /* For final KET and END */
1896    
1897     if (length > 65539)
1898     {
1899     *errorptr = ERR20;
1900     return NULL;
1901     }
1902    
1903     /* Compute the size of data block needed and get it, either from malloc or
1904 nigel 9 externally provided function. We specify "code[0]" in the offsetof() expression
1905     rather than just "code", because it has been reported that one broken compiler
1906     fails on "code" because it is also an independent variable. It should make no
1907     difference to the value of the offsetof(). */
1908 nigel 3
1909 nigel 9 size = length + offsetof(real_pcre, code[0]);
1910 nigel 3 re = (real_pcre *)(pcre_malloc)(size);
1911    
1912     if (re == NULL)
1913     {
1914     *errorptr = ERR21;
1915     return NULL;
1916     }
1917    
1918 nigel 9 /* Put in the magic number and the options. */
1919    
1920 nigel 3 re->magic_number = MAGIC_NUMBER;
1921     re->options = options;
1922    
1923     /* Set up a starting, non-extracting bracket, then compile the expression. On
1924     error, *errorptr will be set non-NULL, so we don't need to look at the result
1925     of the function here. */
1926    
1927 nigel 7 ptr = (const uschar *)pattern;
1928 nigel 3 code = re->code;
1929     *code = OP_BRA;
1930     bracount = 0;
1931     (void)compile_regex(options, &bracount, &code, &ptr, errorptr);
1932     re->top_bracket = bracount;
1933     re->top_backref = top_backref;
1934    
1935     /* If not reached end of pattern on success, there's an excess bracket. */
1936    
1937     if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
1938    
1939     /* Fill in the terminating state and check for disastrous overflow, but
1940     if debugging, leave the test till after things are printed out. */
1941    
1942     *code++ = OP_END;
1943    
1944     #ifndef DEBUG
1945     if (code - re->code > length) *errorptr = ERR23;
1946     #endif
1947    
1948     /* Failed to compile */
1949    
1950     if (*errorptr != NULL)
1951     {
1952     (pcre_free)(re);
1953     PCRE_ERROR_RETURN:
1954 nigel 7 *erroroffset = ptr - (const uschar *)pattern;
1955 nigel 3 return NULL;
1956     }
1957    
1958     /* If the anchored option was not passed, set flag if we can determine that it
1959     is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if
1960     we can determine what the first character has to be, because that speeds up
1961     unanchored matches no end. In the case of multiline matches, an alternative is
1962     to set the PCRE_STARTLINE flag if all branches start with ^. */
1963    
1964     if ((options & PCRE_ANCHORED) == 0)
1965     {
1966     if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))
1967     re->options |= PCRE_ANCHORED;
1968     else
1969     {
1970 nigel 9 int ch = find_firstchar(re->code);
1971     if (ch >= 0)
1972 nigel 3 {
1973 nigel 9 re->first_char = ch;
1974 nigel 3 re->options |= PCRE_FIRSTSET;
1975     }
1976     else if (is_startline(re->code))
1977     re->options |= PCRE_STARTLINE;
1978     }
1979     }
1980    
1981     /* Print out the compiled data for debugging */
1982    
1983     #ifdef DEBUG
1984    
1985     printf("Length = %d top_bracket = %d top_backref=%d\n",
1986     length, re->top_bracket, re->top_backref);
1987    
1988     if (re->options != 0)
1989     {
1990     printf("%s%s%s%s%s%s%s\n",
1991     ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
1992     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
1993     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
1994     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
1995     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
1996     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
1997     ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
1998     }
1999    
2000     if ((re->options & PCRE_FIRSTSET) != 0)
2001     {
2002     if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
2003     else printf("First char = \\x%02x\n", re->first_char);
2004     }
2005    
2006     code_end = code;
2007     code_base = code = re->code;
2008    
2009     while (code < code_end)
2010     {
2011     int charlength;
2012    
2013     printf("%3d ", code - code_base);
2014    
2015     if (*code >= OP_BRA)
2016     {
2017     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2018     code += 2;
2019     }
2020    
2021     else switch(*code)
2022     {
2023     case OP_CHARS:
2024     charlength = *(++code);
2025     printf("%3d ", charlength);
2026     while (charlength-- > 0)
2027     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2028     break;
2029    
2030     case OP_KETRMAX:
2031     case OP_KETRMIN:
2032     case OP_ALT:
2033     case OP_KET:
2034     case OP_ASSERT:
2035     case OP_ASSERT_NOT:
2036     case OP_ONCE:
2037     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2038     code += 2;
2039     break;
2040    
2041     case OP_STAR:
2042     case OP_MINSTAR:
2043     case OP_PLUS:
2044     case OP_MINPLUS:
2045     case OP_QUERY:
2046     case OP_MINQUERY:
2047     case OP_TYPESTAR:
2048     case OP_TYPEMINSTAR:
2049     case OP_TYPEPLUS:
2050     case OP_TYPEMINPLUS:
2051     case OP_TYPEQUERY:
2052     case OP_TYPEMINQUERY:
2053     if (*code >= OP_TYPESTAR)
2054     printf(" %s", OP_names[code[1]]);
2055     else if (isprint(c = code[1])) printf(" %c", c);
2056     else printf(" \\x%02x", c);
2057     printf("%s", OP_names[*code++]);
2058     break;
2059    
2060     case OP_EXACT:
2061     case OP_UPTO:
2062     case OP_MINUPTO:
2063     if (isprint(c = code[3])) printf(" %c{", c);
2064     else printf(" \\x%02x{", c);
2065 nigel 11 if (*code != OP_EXACT) printf("0,");
2066 nigel 3 printf("%d}", (code[1] << 8) + code[2]);
2067     if (*code == OP_MINUPTO) printf("?");
2068     code += 3;
2069     break;
2070    
2071     case OP_TYPEEXACT:
2072     case OP_TYPEUPTO:
2073     case OP_TYPEMINUPTO:
2074     printf(" %s{", OP_names[code[3]]);
2075     if (*code != OP_TYPEEXACT) printf(",");
2076     printf("%d}", (code[1] << 8) + code[2]);
2077     if (*code == OP_TYPEMINUPTO) printf("?");
2078     code += 3;
2079     break;
2080    
2081     case OP_NOT:
2082     if (isprint(c = *(++code))) printf(" [^%c]", c);
2083     else printf(" [^\\x%02x]", c);
2084     break;
2085    
2086     case OP_NOTSTAR:
2087     case OP_NOTMINSTAR:
2088     case OP_NOTPLUS:
2089     case OP_NOTMINPLUS:
2090     case OP_NOTQUERY:
2091     case OP_NOTMINQUERY:
2092     if (isprint(c = code[1])) printf(" [^%c]", c);
2093     else printf(" [^\\x%02x]", c);
2094     printf("%s", OP_names[*code++]);
2095     break;
2096    
2097     case OP_NOTEXACT:
2098     case OP_NOTUPTO:
2099     case OP_NOTMINUPTO:
2100     if (isprint(c = code[3])) printf(" [^%c]{", c);
2101     else printf(" [^\\x%02x]{", c);
2102     if (*code != OP_NOTEXACT) printf(",");
2103     printf("%d}", (code[1] << 8) + code[2]);
2104     if (*code == OP_NOTMINUPTO) printf("?");
2105     code += 3;
2106     break;
2107    
2108     case OP_REF:
2109     printf(" \\%d", *(++code));
2110 nigel 9 code ++;
2111     goto CLASS_REF_REPEAT;
2112 nigel 3
2113     case OP_CLASS:
2114 nigel 13 case OP_NEGCLASS:
2115 nigel 3 {
2116     int i, min, max;
2117    
2118 nigel 13 if (*code++ == OP_CLASS) printf(" [");
2119     else printf(" ^[");
2120 nigel 3
2121     for (i = 0; i < 256; i++)
2122     {
2123     if ((code[i/8] & (1 << (i&7))) != 0)
2124     {
2125     int j;
2126     for (j = i+1; j < 256; j++)
2127     if ((code[j/8] & (1 << (j&7))) == 0) break;
2128     if (i == '-' || i == ']') printf("\\");
2129     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2130     if (--j > i)
2131     {
2132     printf("-");
2133     if (j == '-' || j == ']') printf("\\");
2134     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2135     }
2136     i = j;
2137     }
2138     }
2139     printf("]");
2140     code += 32;
2141    
2142 nigel 9 CLASS_REF_REPEAT:
2143    
2144 nigel 3 switch(*code)
2145     {
2146     case OP_CRSTAR:
2147     case OP_CRMINSTAR:
2148     case OP_CRPLUS:
2149     case OP_CRMINPLUS:
2150     case OP_CRQUERY:
2151     case OP_CRMINQUERY:
2152     printf("%s", OP_names[*code]);
2153     break;
2154    
2155     case OP_CRRANGE:
2156     case OP_CRMINRANGE:
2157     min = (code[1] << 8) + code[2];
2158     max = (code[3] << 8) + code[4];
2159     if (max == 0) printf("{%d,}", min);
2160     else printf("{%d,%d}", min, max);
2161     if (*code == OP_CRMINRANGE) printf("?");
2162     code += 4;
2163     break;
2164    
2165     default:
2166     code--;
2167     }
2168     }
2169     break;
2170    
2171     /* Anything else is just a one-node item */
2172    
2173     default:
2174     printf(" %s", OP_names[*code]);
2175     break;
2176     }
2177    
2178     code++;
2179     printf("\n");
2180     }
2181     printf("------------------------------------------------------------------\n");
2182    
2183     /* This check is done here in the debugging case so that the code that
2184     was compiled can be seen. */
2185    
2186     if (code - re->code > length)
2187     {
2188     *errorptr = ERR23;
2189     (pcre_free)(re);
2190     *erroroffset = ptr - (uschar *)pattern;
2191     return NULL;
2192     }
2193     #endif
2194    
2195     return (pcre *)re;
2196     }
2197    
2198    
2199    
2200     /*************************************************
2201     * Match a character type *
2202     *************************************************/
2203    
2204     /* Not used in all the places it might be as it's sometimes faster
2205     to put the code inline.
2206    
2207     Arguments:
2208     type the character type
2209     c the character
2210     dotall the dotall flag
2211    
2212     Returns: TRUE if character is of the type
2213     */
2214    
2215     static BOOL
2216     match_type(int type, int c, BOOL dotall)
2217     {
2218    
2219     #ifdef DEBUG
2220     if (isprint(c)) printf("matching subject %c against ", c);
2221     else printf("matching subject \\x%02x against ", c);
2222     printf("%s\n", OP_names[type]);
2223     #endif
2224    
2225     switch(type)
2226     {
2227     case OP_ANY: return dotall || c != '\n';
2228     case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0;
2229     case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0;
2230     case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;
2231     case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0;
2232     case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0;
2233     case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0;
2234     }
2235     return FALSE;
2236     }
2237    
2238    
2239    
2240     /*************************************************
2241     * Match a back-reference *
2242     *************************************************/
2243    
2244     /* If a back reference hasn't been set, the match fails.
2245    
2246     Arguments:
2247     number reference number
2248     eptr points into the subject
2249     length length to be matched
2250     md points to match data block
2251    
2252     Returns: TRUE if matched
2253     */
2254    
2255     static BOOL
2256 nigel 7 match_ref(int number, register const uschar *eptr, int length, match_data *md)
2257 nigel 3 {
2258 nigel 7 const uschar *p = md->start_subject + md->offset_vector[number];
2259 nigel 3
2260     #ifdef DEBUG
2261     if (eptr >= md->end_subject)
2262     printf("matching subject <null>");
2263     else
2264     {
2265     printf("matching subject ");
2266     pchars(eptr, length, TRUE, md);
2267     }
2268     printf(" against backref ");
2269     pchars(p, length, FALSE, md);
2270     printf("\n");
2271     #endif
2272    
2273     /* Always fail if not enough characters left */
2274    
2275     if (length > md->end_subject - p) return FALSE;
2276    
2277     /* Separate the caselesss case for speed */
2278    
2279     if (md->caseless)
2280     { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
2281     else
2282     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2283    
2284     return TRUE;
2285     }
2286    
2287    
2288    
2289     /*************************************************
2290     * Match from current position *
2291     *************************************************/
2292    
2293     /* On entry ecode points to the first opcode, and eptr to the first character.
2294    
2295     Arguments:
2296     eptr pointer in subject
2297     ecode position in code
2298     offset_top current top pointer
2299     md pointer to "static" info for the match
2300    
2301     Returns: TRUE if matched
2302     */
2303    
2304     static BOOL
2305 nigel 7 match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2306 nigel 3 match_data *md)
2307     {
2308     for (;;)
2309     {
2310     int min, max, ctype;
2311     register int i;
2312     register int c;
2313 nigel 7 BOOL minimize = FALSE;
2314 nigel 3
2315     /* Opening bracket. Check the alternative branches in turn, failing if none
2316     match. We have to set the start offset if required and there is space
2317     in the offset vector so that it is available for subsequent back references
2318     if the bracket matches. However, if the bracket fails, we must put back the
2319     previous value of both offsets in case they were set by a previous copy of
2320     the same bracket. Don't worry about setting the flag for the error case here;
2321     that is handled in the code for KET. */
2322    
2323     if ((int)*ecode >= OP_BRA)
2324     {
2325     int number = (*ecode - OP_BRA) << 1;
2326 nigel 7 int save_offset1 = 0, save_offset2 = 0;
2327 nigel 3
2328 nigel 9 DPRINTF(("start bracket %d\n", number/2));
2329 nigel 3
2330     if (number > 0 && number < md->offset_end)
2331     {
2332     save_offset1 = md->offset_vector[number];
2333     save_offset2 = md->offset_vector[number+1];
2334     md->offset_vector[number] = eptr - md->start_subject;
2335    
2336 nigel 9 DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
2337 nigel 3 }
2338    
2339     /* Recurse for all the alternatives. */
2340    
2341     do
2342     {
2343     if (match(eptr, ecode+3, offset_top, md)) return TRUE;
2344     ecode += (ecode[1] << 8) + ecode[2];
2345     }
2346     while (*ecode == OP_ALT);
2347    
2348 nigel 9 DPRINTF(("bracket %d failed\n", number/2));
2349 nigel 3
2350     if (number > 0 && number < md->offset_end)
2351     {
2352     md->offset_vector[number] = save_offset1;
2353     md->offset_vector[number+1] = save_offset2;
2354     }
2355    
2356     return FALSE;
2357     }
2358    
2359     /* Other types of node can be handled by a switch */
2360    
2361     switch(*ecode)
2362     {
2363     case OP_END:
2364     md->end_match_ptr = eptr; /* Record where we ended */
2365     md->end_offset_top = offset_top; /* and how many extracts were taken */
2366     return TRUE;
2367    
2368     /* The equivalent of Prolog's "cut" - if the rest doesn't match, the
2369     whole thing doesn't match, so we have to get out via a longjmp(). */
2370    
2371     case OP_CUT:
2372     if (match(eptr, ecode+1, offset_top, md)) return TRUE;
2373     longjmp(md->fail_env, 1);
2374    
2375     /* Assertion brackets. Check the alternative branches in turn - the
2376     matching won't pass the KET for an assertion. If any one branch matches,
2377     the assertion is true. */
2378    
2379     case OP_ASSERT:
2380     do
2381     {
2382     if (match(eptr, ecode+3, offset_top, md)) break;
2383     ecode += (ecode[1] << 8) + ecode[2];
2384     }
2385     while (*ecode == OP_ALT);
2386     if (*ecode == OP_KET) return FALSE;
2387    
2388     /* Continue from after the assertion, updating the offsets high water
2389     mark, since extracts may have been taken during the assertion. */
2390    
2391     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2392     ecode += 3;
2393     offset_top = md->end_offset_top;
2394     continue;
2395    
2396     /* Negative assertion: all branches must fail to match */
2397    
2398     case OP_ASSERT_NOT:
2399     do
2400     {
2401     if (match(eptr, ecode+3, offset_top, md)) return FALSE;
2402     ecode += (ecode[1] << 8) + ecode[2];
2403     }
2404     while (*ecode == OP_ALT);
2405     ecode += 3;
2406     continue;
2407    
2408     /* "Once" brackets are like assertion brackets except that after a match,
2409     the point in the subject string is not moved back. Thus there can never be
2410 nigel 5 a move back into the brackets. Check the alternative branches in turn - the
2411 nigel 3 matching won't pass the KET for this kind of subpattern. If any one branch
2412     matches, we carry on, leaving the subject pointer. */
2413    
2414     case OP_ONCE:
2415     do
2416     {
2417     if (match(eptr, ecode+3, offset_top, md)) break;
2418     ecode += (ecode[1] << 8) + ecode[2];
2419     }
2420     while (*ecode == OP_ALT);
2421     if (*ecode == OP_KET) return FALSE;
2422    
2423     /* Continue as from after the assertion, updating the offsets high water
2424     mark, since extracts may have been taken. */
2425    
2426     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2427     ecode += 3;
2428     offset_top = md->end_offset_top;
2429     eptr = md->end_match_ptr;
2430     continue;
2431    
2432     /* An alternation is the end of a branch; scan along to find the end of the
2433     bracketed group and go to there. */
2434    
2435     case OP_ALT:
2436     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2437     break;
2438    
2439     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
2440     that it may occur zero times. It may repeat infinitely, or not at all -
2441     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
2442     repeat limits are compiled as a number of copies, with the optional ones
2443     preceded by BRAZERO or BRAMINZERO. */
2444    
2445     case OP_BRAZERO:
2446     {
2447 nigel 7 const uschar *next = ecode+1;
2448 nigel 3 if (match(eptr, next, offset_top, md)) return TRUE;
2449     do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2450     ecode = next + 3;
2451     }
2452     break;
2453    
2454     case OP_BRAMINZERO:
2455     {
2456 nigel 7 const uschar *next = ecode+1;
2457 nigel 3 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2458     if (match(eptr, next+3, offset_top, md)) return TRUE;
2459     ecode++;
2460     }
2461     break;;
2462    
2463     /* End of a group, repeated or non-repeating. If we are at the end of
2464     an assertion "group", stop matching and return TRUE, but record the
2465     current high water mark for use by positive assertions. */
2466    
2467     case OP_KET:
2468     case OP_KETRMIN:
2469     case OP_KETRMAX:
2470     {
2471     int number;
2472 nigel 7 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2473 nigel 3
2474     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2475     {
2476     md->end_match_ptr = eptr; /* For ONCE */
2477     md->end_offset_top = offset_top;
2478     return TRUE;
2479     }
2480    
2481     /* In all other cases we have to check the group number back at the
2482     start and if necessary complete handling an extraction by setting the
2483     final offset and bumping the high water mark. */
2484    
2485     number = (*prev - OP_BRA) << 1;
2486    
2487 nigel 9 DPRINTF(("end bracket %d\n", number/2));
2488 nigel 3
2489     if (number > 0)
2490     {
2491     if (number >= md->offset_end) md->offset_overflow = TRUE; else
2492     {
2493     md->offset_vector[number+1] = eptr - md->start_subject;
2494     if (offset_top <= number) offset_top = number + 2;
2495     }
2496     }
2497    
2498     /* For a non-repeating ket, just advance to the next node and continue at
2499     this level. */
2500    
2501     if (*ecode == OP_KET)
2502     {
2503     ecode += 3;
2504     break;
2505     }
2506    
2507     /* The repeating kets try the rest of the pattern or restart from the
2508     preceding bracket, in the appropriate order. */
2509    
2510     if (*ecode == OP_KETRMIN)
2511     {
2512     if (match(eptr, ecode+3, offset_top, md) ||
2513     match(eptr, prev, offset_top, md)) return TRUE;
2514     }
2515     else /* OP_KETRMAX */
2516     {
2517     if (match(eptr, prev, offset_top, md) ||
2518     match(eptr, ecode+3, offset_top, md)) return TRUE;
2519     }
2520     }
2521     return FALSE;
2522    
2523     /* Start of subject unless notbol, or after internal newline if multiline */
2524    
2525     case OP_CIRC:
2526     if (md->notbol && eptr == md->start_subject) return FALSE;
2527     if (md->multiline)
2528     {
2529     if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
2530     ecode++;
2531     break;
2532     }
2533     /* ... else fall through */
2534    
2535     /* Start of subject assertion */
2536    
2537     case OP_SOD:
2538     if (eptr != md->start_subject) return FALSE;
2539     ecode++;
2540     break;
2541    
2542     /* Assert before internal newline if multiline, or before
2543     a terminating newline unless endonly is set, else end of subject unless
2544     noteol is set. */
2545    
2546     case OP_DOLL:
2547     if (md->noteol && eptr >= md->end_subject) return FALSE;
2548     if (md->multiline)
2549     {
2550     if (eptr < md->end_subject && *eptr != '\n') return FALSE;
2551     ecode++;
2552     break;
2553     }
2554     else if (!md->endonly)
2555     {
2556     if (eptr < md->end_subject - 1 ||
2557     (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
2558     ecode++;
2559     break;
2560     }
2561     /* ... else fall through */
2562    
2563     /* End of subject assertion */
2564    
2565     case OP_EOD:
2566     if (eptr < md->end_subject) return FALSE;
2567     ecode++;
2568     break;
2569    
2570     /* Word boundary assertions */
2571    
2572     case OP_NOT_WORD_BOUNDARY:
2573     case OP_WORD_BOUNDARY:
2574     {
2575     BOOL prev_is_word = (eptr != md->start_subject) &&
2576     ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);
2577     BOOL cur_is_word = (eptr < md->end_subject) &&
2578     ((pcre_ctypes[*eptr] & ctype_word) != 0);
2579     if ((*ecode++ == OP_WORD_BOUNDARY)?
2580     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2581     return FALSE;
2582     }
2583     break;
2584    
2585     /* Match a single character type; inline for speed */
2586    
2587     case OP_ANY:
2588     if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;
2589     if (eptr++ >= md->end_subject) return FALSE;
2590     ecode++;
2591     break;
2592    
2593     case OP_NOT_DIGIT:
2594     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)
2595     return FALSE;
2596     ecode++;
2597     break;
2598    
2599     case OP_DIGIT:
2600     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)
2601     return FALSE;
2602     ecode++;
2603     break;
2604    
2605     case OP_NOT_WHITESPACE:
2606     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)
2607     return FALSE;
2608     ecode++;
2609     break;
2610    
2611     case OP_WHITESPACE:
2612     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)
2613     return FALSE;
2614     ecode++;
2615     break;
2616    
2617     case OP_NOT_WORDCHAR:
2618     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)
2619     return FALSE;
2620     ecode++;
2621     break;
2622    
2623     case OP_WORDCHAR:
2624     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)
2625     return FALSE;
2626     ecode++;
2627     break;
2628    
2629     /* Match a back reference, possibly repeatedly. Look past the end of the
2630     item to see if there is repeat information following. The code is similar
2631     to that for character classes, but repeated for efficiency. Then obey
2632     similar code to character type repeats - written out again for speed.
2633     However, if the referenced string is the empty string, always treat
2634     it as matched, any number of times (otherwise there could be infinite
2635     loops). */
2636    
2637     case OP_REF:
2638     {
2639     int length;
2640     int number = ecode[1] << 1; /* Doubled reference number */
2641     ecode += 2; /* Advance past the item */
2642    
2643     if (number >= offset_top || md->offset_vector[number] < 0)
2644     {
2645     md->errorcode = PCRE_ERROR_BADREF;
2646     return FALSE;
2647     }
2648    
2649     length = md->offset_vector[number+1] - md->offset_vector[number];
2650    
2651     switch (*ecode)
2652     {
2653     case OP_CRSTAR:
2654     case OP_CRMINSTAR:
2655     case OP_CRPLUS:
2656     case OP_CRMINPLUS:
2657     case OP_CRQUERY:
2658     case OP_CRMINQUERY:
2659     c = *ecode++ - OP_CRSTAR;
2660     minimize = (c & 1) != 0;
2661     min = rep_min[c]; /* Pick up values from tables; */
2662     max = rep_max[c]; /* zero for max => infinity */
2663     if (max == 0) max = INT_MAX;
2664     break;
2665    
2666     case OP_CRRANGE:
2667     case OP_CRMINRANGE:
2668     minimize = (*ecode == OP_CRMINRANGE);
2669     min = (ecode[1] << 8) + ecode[2];
2670     max = (ecode[3] << 8) + ecode[4];
2671     if (max == 0) max = INT_MAX;
2672     ecode += 5;
2673     break;
2674    
2675     default: /* No repeat follows */
2676     if (!match_ref(number, eptr, length, md)) return FALSE;
2677     eptr += length;
2678     continue; /* With the main loop */
2679     }
2680    
2681     /* If the length of the reference is zero, just continue with the
2682     main loop. */
2683    
2684     if (length == 0) continue;
2685    
2686     /* First, ensure the minimum number of matches are present. We get back
2687     the length of the reference string explicitly rather than passing the
2688     address of eptr, so that eptr can be a register variable. */
2689    
2690     for (i = 1; i <= min; i++)
2691     {
2692     if (!match_ref(number, eptr, length, md)) return FALSE;
2693     eptr += length;
2694     }
2695    
2696     /* If min = max, continue at the same level without recursion.
2697     They are not both allowed to be zero. */
2698    
2699     if (min == max) continue;
2700    
2701     /* If minimizing, keep trying and advancing the pointer */
2702    
2703     if (minimize)
2704     {
2705     for (i = min;; i++)
2706     {
2707     if (match(eptr, ecode, offset_top, md)) return TRUE;
2708     if (i >= max || !match_ref(number, eptr, length, md))
2709     return FALSE;
2710     eptr += length;
2711     }
2712     /* Control never gets here */
2713     }
2714    
2715     /* If maximizing, find the longest string and work backwards */
2716    
2717     else
2718     {
2719 nigel 7 const uschar *pp = eptr;
2720 nigel 3 for (i = min; i < max; i++)
2721     {
2722     if (!match_ref(number, eptr, length, md)) break;
2723     eptr += length;
2724     }
2725     while (eptr >= pp)
2726     {
2727     if (match(eptr, ecode, offset_top, md)) return TRUE;
2728     eptr -= length;
2729     }
2730     return FALSE;
2731     }
2732     }
2733     /* Control never gets here */
2734    
2735     /* Match a character class, possibly repeatedly. Look past the end of the
2736     item to see if there is repeat information following. Then obey similar
2737     code to character type repeats - written out again for speed. If caseless
2738     matching was set at runtime but not at compile time, we have to check both
2739 nigel 13 versions of a character, and we have to behave differently for positive and
2740     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2741     treated differently. */
2742 nigel 3
2743     case OP_CLASS:
2744 nigel 13 case OP_NEGCLASS:
2745 nigel 3 {
2746 nigel 13 BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2747 nigel 7 const uschar *data = ecode + 1; /* Save for matching */
2748     ecode += 33; /* Advance past the item */
2749 nigel 3
2750     switch (*ecode)
2751     {
2752     case OP_CRSTAR:
2753     case OP_CRMINSTAR:
2754     case OP_CRPLUS:
2755     case OP_CRMINPLUS:
2756     case OP_CRQUERY:
2757     case OP_CRMINQUERY:
2758     c = *ecode++ - OP_CRSTAR;
2759     minimize = (c & 1) != 0;
2760     min = rep_min[c]; /* Pick up values from tables; */
2761     max = rep_max[c]; /* zero for max => infinity */
2762     if (max == 0) max = INT_MAX;
2763     break;
2764    
2765     case OP_CRRANGE:
2766     case OP_CRMINRANGE:
2767     minimize = (*ecode == OP_CRMINRANGE);
2768     min = (ecode[1] << 8) + ecode[2];
2769     max = (ecode[3] << 8) + ecode[4];
2770     if (max == 0) max = INT_MAX;
2771     ecode += 5;
2772     break;
2773    
2774     default: /* No repeat follows */
2775 nigel 13 min = max = 1;
2776     break;
2777 nigel 3 }
2778    
2779     /* First, ensure the minimum number of matches are present. */
2780    
2781     for (i = 1; i <= min; i++)
2782     {
2783     if (eptr >= md->end_subject) return FALSE;
2784     c = *eptr++;
2785 nigel 13
2786     /* Either not runtime caseless, or it was a positive class. For
2787     runtime caseless, continue if either case is in the map. */
2788    
2789     if (!nasty_case)
2790 nigel 3 {
2791 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2792     if (md->runtime_caseless)
2793     {
2794     c = pcre_fcc[c];
2795     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2796     }
2797     }
2798    
2799     /* Runtime caseless and it was a negative class. Continue only if
2800     both cases are in the map. */
2801    
2802     else
2803     {
2804     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2805 nigel 3 c = pcre_fcc[c];
2806     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2807     }
2808 nigel 13
2809 nigel 3 return FALSE;
2810     }
2811    
2812     /* If max == min we can continue with the main loop without the
2813     need to recurse. */
2814    
2815     if (min == max) continue;
2816    
2817     /* If minimizing, keep testing the rest of the expression and advancing
2818     the pointer while it matches the class. */
2819    
2820     if (minimize)
2821     {
2822     for (i = min;; i++)
2823     {
2824     if (match(eptr, ecode, offset_top, md)) return TRUE;
2825     if (i >= max || eptr >= md->end_subject) return FALSE;
2826     c = *eptr++;
2827 nigel 13
2828     /* Either not runtime caseless, or it was a positive class. For
2829     runtime caseless, continue if either case is in the map. */
2830    
2831     if (!nasty_case)
2832 nigel 3 {
2833 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2834     if (md->runtime_caseless)
2835     {
2836     c = pcre_fcc[c];
2837     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2838     }
2839     }
2840    
2841     /* Runtime caseless and it was a negative class. Continue only if
2842     both cases are in the map. */
2843    
2844     else
2845     {
2846     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2847 nigel 3 c = pcre_fcc[c];
2848     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2849     }
2850 nigel 13
2851 nigel 3 return FALSE;
2852     }
2853     /* Control never gets here */
2854     }
2855    
2856     /* If maximizing, find the longest possible run, then work backwards. */
2857    
2858     else
2859     {
2860 nigel 7 const uschar *pp = eptr;
2861 nigel 3 for (i = min; i < max; eptr++, i++)
2862     {
2863     if (eptr >= md->end_subject) break;
2864     c = *eptr;
2865 nigel 13
2866     /* Either not runtime caseless, or it was a positive class. For
2867     runtime caseless, continue if either case is in the map. */
2868    
2869     if (!nasty_case)
2870 nigel 3 {
2871 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2872     if (md->runtime_caseless)
2873     {
2874     c = pcre_fcc[c];
2875     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2876     }
2877     }
2878    
2879     /* Runtime caseless and it was a negative class. Continue only if
2880     both cases are in the map. */
2881    
2882     else
2883     {
2884     if ((data[c/8] & (1 << (c&7))) == 0) break;
2885 nigel 3 c = pcre_fcc[c];
2886     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2887     }
2888 nigel 13
2889 nigel 3 break;
2890     }
2891    
2892     while (eptr >= pp)
2893     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2894     return FALSE;
2895     }
2896     }
2897     /* Control never gets here */
2898    
2899     /* Match a run of characters */
2900    
2901     case OP_CHARS:
2902     {
2903     register int length = ecode[1];
2904     ecode += 2;
2905    
2906 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
2907 nigel 3 if (eptr >= md->end_subject)
2908     printf("matching subject <null> against pattern ");
2909     else
2910     {
2911     printf("matching subject ");
2912     pchars(eptr, length, TRUE, md);
2913     printf(" against pattern ");
2914     }
2915     pchars(ecode, length, FALSE, md);
2916     printf("\n");
2917 nigel 9 #endif
2918 nigel 3
2919     if (length > md->end_subject - eptr) return FALSE;
2920     if (md->caseless)
2921     {
2922     while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;
2923     }
2924     else
2925     {
2926     while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
2927     }
2928     }
2929     break;
2930    
2931     /* Match a single character repeatedly; different opcodes share code. */
2932    
2933     case OP_EXACT:
2934     min = max = (ecode[1] << 8) + ecode[2];
2935     ecode += 3;
2936     goto REPEATCHAR;
2937    
2938     case OP_UPTO:
2939     case OP_MINUPTO:
2940     min = 0;
2941     max = (ecode[1] << 8) + ecode[2];
2942     minimize = *ecode == OP_MINUPTO;
2943     ecode += 3;
2944     goto REPEATCHAR;
2945    
2946     case OP_STAR:
2947     case OP_MINSTAR:
2948     case OP_PLUS:
2949     case OP_MINPLUS:
2950     case OP_QUERY:
2951     case OP_MINQUERY:
2952     c = *ecode++ - OP_STAR;
2953     minimize = (c & 1) != 0;
2954     min = rep_min[c]; /* Pick up values from tables; */
2955     max = rep_max[c]; /* zero for max => infinity */
2956     if (max == 0) max = INT_MAX;
2957    
2958     /* Common code for all repeated single-character matches. We can give
2959     up quickly if there are fewer than the minimum number of characters left in
2960     the subject. */
2961    
2962     REPEATCHAR:
2963     if (min > md->end_subject - eptr) return FALSE;
2964     c = *ecode++;
2965    
2966     /* The code is duplicated for the caseless and caseful cases, for speed,
2967     since matching characters is likely to be quite common. First, ensure the
2968     minimum number of matches are present. If min = max, continue at the same
2969     level without recursing. Otherwise, if minimizing, keep trying the rest of
2970     the expression and advancing one matching character if failing, up to the
2971     maximum. Alternatively, if maximizing, find the maximum number of
2972     characters and work backwards. */
2973    
2974 nigel 9 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2975     max, eptr));
2976 nigel 3
2977     if (md->caseless)
2978     {
2979     c = pcre_lcc[c];
2980     for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;
2981     if (min == max) continue;
2982     if (minimize)
2983     {
2984     for (i = min;; i++)
2985     {
2986     if (match(eptr, ecode, offset_top, md)) return TRUE;
2987     if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])
2988     return FALSE;
2989     }
2990     /* Control never gets here */
2991     }
2992     else
2993     {
2994 nigel 7 const uschar *pp = eptr;
2995 nigel 3 for (i = min; i < max; i++)
2996     {
2997     if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
2998     eptr++;
2999     }
3000     while (eptr >= pp)
3001     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3002     return FALSE;
3003     }
3004     /* Control never gets here */
3005     }
3006    
3007     /* Caseful comparisons */
3008    
3009     else
3010     {
3011     for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3012     if (min == max) continue;
3013     if (minimize)
3014     {
3015     for (i = min;; i++)
3016     {
3017     if (match(eptr, ecode, offset_top, md)) return TRUE;
3018     if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3019     }
3020     /* Control never gets here */
3021     }
3022     else
3023     {
3024 nigel 7 const uschar *pp = eptr;
3025 nigel 3 for (i = min; i < max; i++)
3026     {
3027     if (eptr >= md->end_subject || c != *eptr) break;
3028     eptr++;
3029     }
3030     while (eptr >= pp)
3031     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3032     return FALSE;
3033     }
3034     }
3035     /* Control never gets here */
3036    
3037     /* Match a negated single character */
3038    
3039     case OP_NOT:
3040 nigel 9 if (eptr >= md->end_subject) return FALSE;
3041 nigel 3 ecode++;
3042     if (md->caseless)
3043     {
3044     if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;
3045     }
3046     else
3047     {
3048     if (*ecode++ == *eptr++) return FALSE;
3049     }
3050     break;
3051    
3052     /* Match a negated single character repeatedly. This is almost a repeat of
3053     the code for a repeated single character, but I haven't found a nice way of
3054     commoning these up that doesn't require a test of the positive/negative
3055     option for each character match. Maybe that wouldn't add very much to the
3056     time taken, but character matching *is* what this is all about... */
3057    
3058     case OP_NOTEXACT:
3059     min = max = (ecode[1] << 8) + ecode[2];
3060     ecode += 3;
3061     goto REPEATNOTCHAR;
3062    
3063     case OP_NOTUPTO:
3064     case OP_NOTMINUPTO:
3065     min = 0;
3066     max = (ecode[1] << 8) + ecode[2];
3067     minimize = *ecode == OP_NOTMINUPTO;
3068     ecode += 3;
3069     goto REPEATNOTCHAR;
3070    
3071     case OP_NOTSTAR:
3072     case OP_NOTMINSTAR:
3073     case OP_NOTPLUS:
3074     case OP_NOTMINPLUS:
3075     case OP_NOTQUERY:
3076     case OP_NOTMINQUERY:
3077     c = *ecode++ - OP_NOTSTAR;
3078     minimize = (c & 1) != 0;
3079     min = rep_min[c]; /* Pick up values from tables; */
3080     max = rep_max[c]; /* zero for max => infinity */
3081     if (max == 0) max = INT_MAX;
3082    
3083     /* Common code for all repeated single-character matches. We can give
3084     up quickly if there are fewer than the minimum number of characters left in
3085     the subject. */
3086    
3087     REPEATNOTCHAR:
3088     if (min > md->end_subject - eptr) return FALSE;
3089     c = *ecode++;
3090    
3091     /* The code is duplicated for the caseless and caseful cases, for speed,
3092     since matching characters is likely to be quite common. First, ensure the
3093     minimum number of matches are present. If min = max, continue at the same
3094     level without recursing. Otherwise, if minimizing, keep trying the rest of
3095     the expression and advancing one matching character if failing, up to the
3096     maximum. Alternatively, if maximizing, find the maximum number of
3097     characters and work backwards. */
3098    
3099 nigel 9 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3100     max, eptr));
3101 nigel 3
3102     if (md->caseless)
3103     {
3104     c = pcre_lcc[c];
3105     for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;
3106     if (min == max) continue;
3107     if (minimize)
3108     {
3109     for (i = min;; i++)
3110     {
3111     if (match(eptr, ecode, offset_top, md)) return TRUE;
3112     if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])
3113     return FALSE;
3114     }
3115     /* Control never gets here */
3116     }
3117     else
3118     {
3119 nigel 7 const uschar *pp = eptr;
3120 nigel 3 for (i = min; i < max; i++)
3121     {
3122     if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
3123     eptr++;
3124     }
3125     while (eptr >= pp)
3126     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3127     return FALSE;
3128     }
3129     /* Control never gets here */
3130     }
3131    
3132     /* Caseful comparisons */
3133    
3134     else
3135     {
3136     for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3137     if (min == max) continue;
3138     if (minimize)
3139     {
3140     for (i = min;; i++)
3141     {
3142     if (match(eptr, ecode, offset_top, md)) return TRUE;
3143     if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3144     }
3145     /* Control never gets here */
3146     }
3147     else
3148     {
3149 nigel 7 const uschar *pp = eptr;
3150 nigel 3 for (i = min; i < max; i++)
3151     {
3152     if (eptr >= md->end_subject || c == *eptr) break;
3153     eptr++;
3154     }
3155     while (eptr >= pp)
3156     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3157     return FALSE;
3158     }
3159     }
3160     /* Control never gets here */
3161    
3162     /* Match a single character type repeatedly; several different opcodes
3163     share code. This is very similar to the code for single characters, but we
3164     repeat it in the interests of efficiency. */
3165    
3166     case OP_TYPEEXACT:
3167     min = max = (ecode[1] << 8) + ecode[2];
3168     minimize = TRUE;
3169     ecode += 3;
3170     goto REPEATTYPE;
3171    
3172     case OP_TYPEUPTO:
3173     case OP_TYPEMINUPTO:
3174     min = 0;
3175     max = (ecode[1] << 8) + ecode[2];
3176     minimize = *ecode == OP_TYPEMINUPTO;
3177     ecode += 3;
3178     goto REPEATTYPE;
3179    
3180     case OP_TYPESTAR:
3181     case OP_TYPEMINSTAR:
3182     case OP_TYPEPLUS:
3183     case OP_TYPEMINPLUS:
3184     case OP_TYPEQUERY:
3185     case OP_TYPEMINQUERY:
3186     c = *ecode++ - OP_TYPESTAR;
3187     minimize = (c & 1) != 0;
3188     min = rep_min[c]; /* Pick up values from tables; */
3189     max = rep_max[c]; /* zero for max => infinity */
3190     if (max == 0) max = INT_MAX;
3191    
3192     /* Common code for all repeated single character type matches */
3193    
3194     REPEATTYPE:
3195     ctype = *ecode++; /* Code for the character type */
3196    
3197     /* First, ensure the minimum number of matches are present. Use inline
3198     code for maximizing the speed, and do the type test once at the start
3199     (i.e. keep it out of the loop). Also test that there are at least the
3200     minimum number of characters before we start. */
3201    
3202     if (min > md->end_subject - eptr) return FALSE;
3203     if (min > 0) switch(ctype)
3204     {
3205     case OP_ANY:
3206     if (!md->dotall)
3207     { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3208     else eptr += min;
3209     break;
3210    
3211     case OP_NOT_DIGIT:
3212     for (i = 1; i <= min; i++)
3213     if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3214     break;
3215    
3216     case OP_DIGIT:
3217     for (i = 1; i <= min; i++)
3218     if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3219     break;
3220    
3221     case OP_NOT_WHITESPACE:
3222     for (i = 1; i <= min; i++)
3223     if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3224     break;
3225    
3226     case OP_WHITESPACE:
3227     for (i = 1; i <= min; i++)
3228     if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3229     break;
3230    
3231     case OP_NOT_WORDCHAR:
3232     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)
3233     return FALSE;
3234     break;
3235    
3236     case OP_WORDCHAR:
3237     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)
3238     return FALSE;
3239     break;
3240     }
3241    
3242     /* If min = max, continue at the same level without recursing */
3243    
3244     if (min == max) continue;
3245    
3246     /* If minimizing, we have to test the rest of the pattern before each
3247     subsequent match, so inlining isn't much help; just use the function. */
3248    
3249     if (minimize)
3250     {
3251     for (i = min;; i++)
3252     {
3253     if (match(eptr, ecode, offset_top, md)) return TRUE;
3254     if (i >= max || eptr >= md->end_subject ||
3255     !match_type(ctype, *eptr++, md->dotall))
3256     return FALSE;
3257     }
3258     /* Control never gets here */
3259     }
3260    
3261     /* If maximizing it is worth using inline code for speed, doing the type
3262     test once at the start (i.e. keep it out of the loop). */
3263    
3264     else
3265     {
3266 nigel 7 const uschar *pp = eptr;
3267 nigel 3 switch(ctype)
3268     {
3269     case OP_ANY:
3270     if (!md->dotall)
3271     {
3272     for (i = min; i < max; i++)
3273     {
3274     if (eptr >= md->end_subject || *eptr == '\n') break;
3275     eptr++;
3276     }
3277     }
3278     else
3279     {
3280     c = max - min;
3281     if (c > md->end_subject - eptr) c = md->end_subject - eptr;
3282     eptr += c;
3283     }
3284     break;
3285    
3286     case OP_NOT_DIGIT:
3287     for (i = min; i < max; i++)
3288     {
3289     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)
3290     break;
3291     eptr++;
3292     }
3293     break;
3294    
3295     case OP_DIGIT:
3296     for (i = min; i < max; i++)
3297     {
3298     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)
3299     break;
3300     eptr++;
3301     }
3302     break;
3303    
3304     case OP_NOT_WHITESPACE:
3305     for (i = min; i < max; i++)
3306     {
3307     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)
3308     break;
3309     eptr++;
3310     }
3311     break;
3312    
3313     case OP_WHITESPACE:
3314     for (i = min; i < max; i++)
3315     {
3316     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)
3317     break;
3318     eptr++;
3319     }
3320     break;
3321    
3322     case OP_NOT_WORDCHAR:
3323     for (i = min; i < max; i++)
3324     {
3325     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)
3326     break;
3327     eptr++;
3328     }
3329     break;
3330    
3331     case OP_WORDCHAR:
3332     for (i = min; i < max; i++)
3333     {
3334     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)
3335     break;
3336     eptr++;
3337     }
3338     break;
3339     }
3340    
3341     while (eptr >= pp)
3342     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3343     return FALSE;
3344     }
3345     /* Control never gets here */
3346    
3347     /* There's been some horrible disaster. */
3348    
3349     default:
3350 nigel 9 DPRINTF(("Unknown opcode %d\n", *ecode));
3351 nigel 3 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3352     return FALSE;
3353     }
3354    
3355     /* Do not stick any code in here without much thought; it is assumed
3356     that "continue" in the code above comes out to here to repeat the main
3357     loop. */
3358    
3359     } /* End of main loop */
3360     /* Control never reaches here */
3361     }
3362    
3363    
3364    
3365     /*************************************************
3366 nigel 9 * Segregate setjmp() *
3367     *************************************************/
3368    
3369     /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3370     is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3371     hide it in a separate function. This is called only when PCRE_EXTRA is set,
3372     since it's needed only for the extension \X option, and with any luck, a good
3373     compiler will spot the tail recursion and compile it efficiently.
3374    
3375 nigel 11 Arguments:
3376     eptr pointer in subject
3377     ecode position in code
3378     offset_top current top pointer
3379     md pointer to "static" info for the match
3380    
3381     Returns: TRUE if matched
3382 nigel 9 */
3383    
3384 nigel 11 static BOOL
3385     match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3386     match_data *match_block)
3387 nigel 9 {
3388 nigel 11 return setjmp(match_block->fail_env) == 0 &&
3389     match(eptr, ecode, offset_top, match_block);
3390 nigel 9 }
3391    
3392    
3393    
3394     /*************************************************
3395 nigel 3 * Execute a Regular Expression *
3396     *************************************************/
3397    
3398     /* This function applies a compiled re to a subject string and picks out
3399     portions of the string if it matches. Two elements in the vector are set for
3400     each substring: the offsets to the start and end of the substring.
3401    
3402     Arguments:
3403     external_re points to the compiled expression
3404     external_extra points to "hints" from pcre_study() or is NULL
3405     subject points to the subject string
3406     length length of subject string (may contain binary zeros)
3407     options option bits
3408     offsets points to a vector of ints to be filled in with offsets
3409     offsetcount the number of elements in the vector
3410    
3411     Returns: > 0 => success; value is the number of elements filled in
3412     = 0 => success, but offsets is not big enough
3413     -1 => failed to match
3414     < -1 => some kind of unexpected problem
3415     */
3416    
3417     int
3418     pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3419     const char *subject, int length, int options, int *offsets, int offsetcount)
3420     {
3421 nigel 11 int resetcount, ocount;
3422 nigel 3 int first_char = -1;
3423     match_data match_block;
3424 nigel 7 const uschar *start_bits = NULL;
3425 nigel 9 const uschar *start_match = (const uschar *)subject;
3426 nigel 7 const uschar *end_subject;
3427     const real_pcre *re = (const real_pcre *)external_re;
3428     const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3429 nigel 11 BOOL using_temporary_offsets = FALSE;
3430 nigel 3 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3431     BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3432    
3433     if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3434    
3435     if (re == NULL || subject == NULL ||
3436     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3437     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3438    
3439 nigel 7 match_block.start_subject = (const uschar *)subject;
3440 nigel 3 match_block.end_subject = match_block.start_subject + length;
3441     end_subject = match_block.end_subject;
3442    
3443     match_block.caseless = ((re->options | options) & PCRE_CASELESS) != 0;
3444     match_block.runtime_caseless = match_block.caseless &&
3445     (re->options & PCRE_CASELESS) == 0;
3446    
3447     match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;
3448     match_block.dotall = ((re->options | options) & PCRE_DOTALL) != 0;
3449     match_block.endonly = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;
3450    
3451     match_block.notbol = (options & PCRE_NOTBOL) != 0;
3452     match_block.noteol = (options & PCRE_NOTEOL) != 0;
3453    
3454     match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
3455    
3456     /* If the expression has got more back references than the offsets supplied can
3457     hold, we get a temporary bit of working store to use during the matching.
3458 nigel 11 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3459     of 2. */
3460 nigel 3
3461 nigel 11 ocount = offsetcount & (-2);
3462     if (re->top_backref > 0 && re->top_backref >= ocount/2)
3463 nigel 3 {
3464     ocount = re->top_backref * 2 + 2;
3465 nigel 15 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3466 nigel 3 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3467 nigel 11 using_temporary_offsets = TRUE;
3468 nigel 9 DPRINTF(("Got memory to hold back references\n"));
3469 nigel 3 }
3470     else match_block.offset_vector = offsets;
3471    
3472     match_block.offset_end = ocount;
3473     match_block.offset_overflow = FALSE;
3474    
3475     /* Compute the minimum number of offsets that we need to reset each time. Doing
3476     this makes a huge difference to execution time when there aren't many brackets
3477     in the pattern. */
3478    
3479     resetcount = 2 + re->top_bracket * 2;
3480     if (resetcount > offsetcount) resetcount = ocount;
3481    
3482     /* If MULTILINE is set at exec time but was not set at compile time, and the
3483     anchored flag is set, we must re-check because a setting provoked by ^ in the
3484     pattern is not right in multi-line mode. Calling is_anchored() again here does
3485     the right check, because multiline is now set. If it now yields FALSE, the
3486     expression must have had ^ starting some of its branches. Check to see if
3487     that is true for *all* branches, and if so, set the startline flag. */
3488    
3489     if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
3490     !is_anchored(re->code, match_block.multiline))
3491     {
3492     anchored = FALSE;
3493     if (is_startline(re->code)) startline = TRUE;
3494     }
3495    
3496     /* Set up the first character to match, if available. The first_char value is
3497     never set for an anchored regular expression, but the anchoring may be forced
3498     at run time, so we have to test for anchoring. The first char may be unset for
3499     an unanchored pattern, of course. If there's no first char and the pattern was
3500     studied, the may be a bitmap of possible first characters. However, we can
3501     use this only if the caseless state of the studying was correct. */
3502    
3503     if (!anchored)
3504     {
3505     if ((re->options & PCRE_FIRSTSET) != 0)
3506     {
3507     first_char = re->first_char;
3508     if (match_block.caseless) first_char = pcre_lcc[first_char];
3509     }
3510     else
3511     if (!startline && extra != NULL &&
3512     (extra->options & PCRE_STUDY_MAPPED) != 0 &&
3513     ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)
3514     start_bits = extra->start_bits;
3515     }
3516    
3517     /* Loop for unanchored matches; for anchored regexps the loop runs just once. */
3518    
3519     do
3520     {
3521 nigel 9 int rc;
3522 nigel 3 register int *iptr = match_block.offset_vector;
3523     register int *iend = iptr + resetcount;
3524    
3525     /* Reset the maximum number of extractions we might see. */
3526    
3527     while (iptr < iend) *iptr++ = -1;
3528    
3529     /* Advance to a unique first char if possible */
3530    
3531     if (first_char >= 0)
3532     {
3533     if (match_block.caseless)
3534     while (start_match < end_subject && pcre_lcc[*start_match] != first_char)
3535     start_match++;
3536     else
3537     while (start_match < end_subject && *start_match != first_char)
3538     start_match++;
3539     }
3540    
3541     /* Or to just after \n for a multiline match if possible */
3542    
3543     else if (startline)
3544     {
3545     if (start_match > match_block.start_subject)
3546     {
3547     while (start_match < end_subject && start_match[-1] != '\n')
3548     start_match++;
3549     }
3550     }
3551    
3552     /* Or to a non-unique first char */
3553    
3554     else if (start_bits != NULL)
3555     {
3556     while (start_match < end_subject)
3557     {
3558     register int c = *start_match;
3559     if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
3560     }
3561     }
3562    
3563 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3564 nigel 3 printf(">>>> Match against: ");
3565     pchars(start_match, end_subject - start_match, TRUE, &match_block);
3566     printf("\n");
3567 nigel 9 #endif
3568 nigel 3
3569     /* When a match occurs, substrings will be set for all internal extractions;
3570     we just need to set up the whole thing as substring 0 before returning. If
3571     there were too many extractions, set the return code to zero. In the case
3572     where we had to get some local store to hold offsets for backreferences, copy
3573     those back references that we can. In this case there need not be overflow
3574     if certain parts of the pattern were not used.
3575    
3576     Before starting the match, we have to set up a longjmp() target to enable
3577 nigel 9 the "cut" operation to fail a match completely without backtracking. This
3578     is done in a separate function to avoid compiler warnings. We need not do
3579     it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3580     enabled. */
3581 nigel 3
3582 nigel 11 if ((re->options & PCRE_EXTRA) != 0)
3583     {
3584     if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3585     continue;
3586     }
3587     else if (!match(start_match, re->code, 2, &match_block)) continue;
3588 nigel 9
3589     /* Copy the offset information from temporary store if necessary */
3590    
3591 nigel 11 if (using_temporary_offsets)
3592 nigel 3 {
3593 nigel 9 if (offsetcount >= 4)
3594 nigel 3 {
3595 nigel 9 memcpy(offsets + 2, match_block.offset_vector + 2,
3596     (offsetcount - 2) * sizeof(int));
3597 nigel 11 DPRINTF(("Copied offsets from temporary memory\n"));
3598 nigel 3 }
3599 nigel 9 if (match_block.end_offset_top > offsetcount)
3600     match_block.offset_overflow = TRUE;
3601 nigel 3
3602 nigel 9 DPRINTF(("Freeing temporary memory\n"));
3603     (pcre_free)(match_block.offset_vector);
3604     }
3605 nigel 3
3606 nigel 9 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
3607 nigel 3
3608 nigel 9 if (match_block.offset_end < 2) rc = 0; else
3609     {
3610     offsets[0] = start_match - match_block.start_subject;
3611     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3612 nigel 3 }
3613 nigel 9
3614     DPRINTF((">>>> returning %d\n", rc));
3615     return rc;
3616 nigel 3 }
3617     while (!anchored &&
3618     match_block.errorcode == PCRE_ERROR_NOMATCH &&
3619     start_match++ < end_subject);
3620    
3621 nigel 11 if (using_temporary_offsets)
3622     {
3623     DPRINTF(("Freeing temporary memory\n"));
3624     (pcre_free)(match_block.offset_vector);
3625     }
3626    
3627 nigel 9 DPRINTF((">>>> returning %d\n", match_block.errorcode));
3628 nigel 3
3629     return match_block.errorcode;
3630     }
3631    
3632     /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12