/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 180 by ph10, Wed Jun 13 10:59:18 2007 UTC revision 412 by ph10, Sat Apr 11 10:34:37 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  supporting internal functions that are not used by other modules. */  supporting internal functions that are not used by other modules. */
43    
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK cd             /* Block containing newline information */  #define NLBLOCK cd             /* Block containing newline information */
50  #define PSSTART start_pattern  /* Field containing processed string start */  #define PSSTART start_pattern  /* Field containing processed string start */
51  #define PSEND   end_pattern    /* Field containing processed string end */  #define PSEND   end_pattern    /* Field containing processed string end */
52    
   
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
# Line 62  used by pcretest. DEBUG is not defined w Line 65  used by pcretest. DEBUG is not defined w
65    
66  #define SETBIT(a,b) a[b/8] |= (1 << (b%8))  #define SETBIT(a,b) a[b/8] |= (1 << (b%8))
67    
68    /* Maximum length value to check against when making sure that the integer that
69    holds the compiled pattern length does not overflow. We make it a bit less than
70    INT_MAX to allow for adding in group terminating bytes, so that we don't have
71    to check them every time. */
72    
73    #define OFLOW_MAX (INT_MAX - 20)
74    
75    
76  /*************************************************  /*************************************************
77  *      Code parameters and static tables         *  *      Code parameters and static tables         *
# Line 87  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 120  static const short int escapes[] = { Line 165  static const short int escapes[] = {
165  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
166  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
167  /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,  /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
168  /*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,  /*  D0 */   '}',     0, -ESC_K,       0,      0,     0,      0, -ESC_P,
169  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
170  /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,  /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
171  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
# Line 130  static const short int escapes[] = { Line 175  static const short int escapes[] = {
175  #endif  #endif
176    
177    
178  /* Tables of names of POSIX character classes and their lengths. The list is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  terminated by a zero length entry. The first three must be alpha, lower, upper,  searched linearly. Put all the names into a single string, in order to reduce
180  as this is assumed for handling case independence. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182  static const char *const posix_names[] = {  platforms. */
183    "alpha", "lower", "upper",  
184    "alnum", "ascii", "blank", "cntrl", "digit", "graph",  typedef struct verbitem {
185    "print", "punct", "space", "word",  "xdigit" };    int   len;
186      int   op;
187    } verbitem;
188    
189    static const char verbnames[] =
190      STRING_ACCEPT0
191      STRING_COMMIT0
192      STRING_F0
193      STRING_FAIL0
194      STRING_PRUNE0
195      STRING_SKIP0
196      STRING_THEN;
197    
198    static const verbitem verbs[] = {
199      { 6, OP_ACCEPT },
200      { 6, OP_COMMIT },
201      { 1, OP_FAIL },
202      { 4, OP_FAIL },
203      { 5, OP_PRUNE },
204      { 4, OP_SKIP  },
205      { 4, OP_THEN  }
206    };
207    
208    static const int verbcount = sizeof(verbs)/sizeof(verbitem);
209    
210    
211    /* Tables of names of POSIX character classes and their lengths. The names are
212    now all in a single string, to reduce the number of relocations when a shared
213    library is dynamically loaded. The list of lengths is terminated by a zero
214    length entry. The first three must be alpha, lower, upper, as this is assumed
215    for handling case independence. */
216    
217    static const char posix_names[] =
218      STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219      STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220      STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 176  static const int posix_class_maps[] = { Line 257  static const int posix_class_maps[] = {
257  /* The texts of compile-time error messages. These are "char *" because they  /* The texts of compile-time error messages. These are "char *" because they
258  are passed to the outside world. Do not ever re-use any error number, because  are passed to the outside world. Do not ever re-use any error number, because
259  they are documented. Always add a new error instead. Messages marked DEAD below  they are documented. Always add a new error instead. Messages marked DEAD below
260  are no longer used. */  are no longer used. This used to be a table of strings, but in order to reduce
261    the number of relocations needed when a shared library is loaded dynamically,
262  static const char *error_texts[] = {  it is now one long string. We cannot use a table of offsets, because the
263    "no error",  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
264    "\\ at end of pattern",  simply count through to the one we want - this isn't a performance issue
265    "\\c at end of pattern",  because these strings are used only when there is a compilation error. */
266    "unrecognized character follows \\",  
267    "numbers out of order in {} quantifier",  static const char error_texts[] =
268      "no error\0"
269      "\\ at end of pattern\0"
270      "\\c at end of pattern\0"
271      "unrecognized character follows \\\0"
272      "numbers out of order in {} quantifier\0"
273    /* 5 */    /* 5 */
274    "number too big in {} quantifier",    "number too big in {} quantifier\0"
275    "missing terminating ] for character class",    "missing terminating ] for character class\0"
276    "invalid escape sequence in character class",    "invalid escape sequence in character class\0"
277    "range out of order in character class",    "range out of order in character class\0"
278    "nothing to repeat",    "nothing to repeat\0"
279    /* 10 */    /* 10 */
280    "operand of unlimited repeat could match the empty string",  /** DEAD **/    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
281    "internal error: unexpected repeat",    "internal error: unexpected repeat\0"
282    "unrecognized character after (?",    "unrecognized character after (? or (?-\0"
283    "POSIX named classes are supported only within a class",    "POSIX named classes are supported only within a class\0"
284    "missing )",    "missing )\0"
285    /* 15 */    /* 15 */
286    "reference to non-existent subpattern",    "reference to non-existent subpattern\0"
287    "erroffset passed as NULL",    "erroffset passed as NULL\0"
288    "unknown option bit(s) set",    "unknown option bit(s) set\0"
289    "missing ) after comment",    "missing ) after comment\0"
290    "parentheses nested too deeply",  /** DEAD **/    "parentheses nested too deeply\0"  /** DEAD **/
291    /* 20 */    /* 20 */
292    "regular expression too large",    "regular expression is too large\0"
293    "failed to get memory",    "failed to get memory\0"
294    "unmatched parentheses",    "unmatched parentheses\0"
295    "internal error: code overflow",    "internal error: code overflow\0"
296    "unrecognized character after (?<",    "unrecognized character after (?<\0"
297    /* 25 */    /* 25 */
298    "lookbehind assertion is not fixed length",    "lookbehind assertion is not fixed length\0"
299    "malformed number or name after (?(",    "malformed number or name after (?(\0"
300    "conditional group contains more than two branches",    "conditional group contains more than two branches\0"
301    "assertion expected after (?(",    "assertion expected after (?(\0"
302    "(?R or (?[+-]digits must be followed by )",    "(?R or (?[+-]digits must be followed by )\0"
303    /* 30 */    /* 30 */
304    "unknown POSIX class name",    "unknown POSIX class name\0"
305    "POSIX collating elements are not supported",    "POSIX collating elements are not supported\0"
306    "this version of PCRE is not compiled with PCRE_UTF8 support",    "this version of PCRE is not compiled with PCRE_UTF8 support\0"
307    "spare error",  /** DEAD **/    "spare error\0"  /** DEAD **/
308    "character value in \\x{...} sequence is too large",    "character value in \\x{...} sequence is too large\0"
309    /* 35 */    /* 35 */
310    "invalid condition (?(0)",    "invalid condition (?(0)\0"
311    "\\C not allowed in lookbehind assertion",    "\\C not allowed in lookbehind assertion\0"
312    "PCRE does not support \\L, \\l, \\N, \\U, or \\u",    "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
313    "number after (?C is > 255",    "number after (?C is > 255\0"
314    "closing ) for (?C expected",    "closing ) for (?C expected\0"
315    /* 40 */    /* 40 */
316    "recursive call could loop indefinitely",    "recursive call could loop indefinitely\0"
317    "unrecognized character after (?P",    "unrecognized character after (?P\0"
318    "syntax error in subpattern name (missing terminator)",    "syntax error in subpattern name (missing terminator)\0"
319    "two named subpatterns have the same name",    "two named subpatterns have the same name\0"
320    "invalid UTF-8 string",    "invalid UTF-8 string\0"
321    /* 45 */    /* 45 */
322    "support for \\P, \\p, and \\X has not been compiled",    "support for \\P, \\p, and \\X has not been compiled\0"
323    "malformed \\P or \\p sequence",    "malformed \\P or \\p sequence\0"
324    "unknown property name after \\P or \\p",    "unknown property name after \\P or \\p\0"
325    "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",    "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
326    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
327    /* 50 */    /* 50 */
328    "repeated subpattern is too long",    "repeated subpattern is too long\0"    /** DEAD **/
329    "octal value is greater than \\377 (not in UTF-8 mode)",    "octal value is greater than \\377 (not in UTF-8 mode)\0"
330    "internal error: overran compiling workspace",    "internal error: overran compiling workspace\0"
331    "internal error: previously-checked referenced subpattern not found",    "internal error: previously-checked referenced subpattern not found\0"
332    "DEFINE group contains more than one branch",    "DEFINE group contains more than one branch\0"
333    /* 55 */    /* 55 */
334    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed\0"
335    "inconsistent NEWLINE options",    "inconsistent NEWLINE options\0"
336    "\\g is not followed by a braced name or an optionally braced non-zero number",    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
337    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"    "a numbered reference must not be zero\0"
338  };    "(*VERB) with an argument is not supported\0"
339      /* 60 */
340      "(*VERB) not recognized\0"
341      "number is too big\0"
342      "subpattern name expected\0"
343      "digit expected after (?+\0"
344      "] is an invalid data character in JavaScript compatibility mode";
345    
346    
347  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 268  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 304  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 385  static BOOL Line 484  static BOOL
484    
485    
486  /*************************************************  /*************************************************
487    *            Find an error text                  *
488    *************************************************/
489    
490    /* The error texts are now all in one long string, to save on relocations. As
491    some of the text is of unknown length, we can't use a table of offsets.
492    Instead, just count through the strings. This is not a performance issue
493    because it happens only when there has been a compilation error.
494    
495    Argument:   the error number
496    Returns:    pointer to the error string
497    */
498    
499    static const char *
500    find_error_text(int n)
501    {
502    const char *s = error_texts;
503    for (; n > 0; n--) while (*s++ != 0) {};
504    return s;
505    }
506    
507    
508    /*************************************************
509  *            Handle escapes                      *  *            Handle escapes                      *
510  *************************************************/  *************************************************/
511    
# Line 405  Arguments: Line 526  Arguments:
526    
527  Returns:         zero or positive => a data character  Returns:         zero or positive => a data character
528                   negative => a special escape sequence                   negative => a special escape sequence
529                   on error, errorptr is set                   on error, errorcodeptr is set
530  */  */
531    
532  static int  static int
# Line 423  ptr--; /* Set Line 544  ptr--; /* Set
544    
545  if (c == 0) *errorcodeptr = ERR1;  if (c == 0) *errorcodeptr = ERR1;
546    
547  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in  /* Non-alphanumerics are literals. For digits or letters, do an initial lookup
548  a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
557  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
558  #endif  #endif
559    
# Line 448  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
580      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by one of a number of specific things:
581      is an absolute backreference. If negative, it is a relative backreference.  
582      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a      (1) A number, either plain or braced. If positive, it is an absolute
583      reference to a named group. This is part of Perl's movement towards a      backreference. If negative, it is a relative backreference. This is a Perl
584      unified syntax for back references. As this is synonymous with \k{name}, we      5.10 feature.
585      fudge it up by pretending it really was \k. */  
586        (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
587        is part of Perl's movement towards a unified syntax for back references. As
588        this is synonymous with \k{name}, we fudge it up by pretending it really
589        was \k.
590    
591        (3) For Oniguruma compatibility we also support \g followed by a name or a
592        number either in angle brackets or in single quotes. However, these are
593        (possibly recursive) subroutine calls, _not_ backreferences. Just return
594        the -ESC_g code (cf \k). */
595    
596      case 'g':      case CHAR_g:
597      if (ptr[1] == '{')      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598          {
599          c = -ESC_g;
600          break;
601          }
602    
603        /* Handle the Perl-compatible cases */
604    
605        if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 479  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 488  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c == 0 || (braced && *(++ptr) != '}'))      if (c < 0)   /* Integer overflow */
632          {
633          *errorcodeptr = ERR61;
634          break;
635          }
636    
637        if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        return 0;        break;
641          }
642    
643        if (c == 0)
644          {
645          *errorcodeptr = ERR58;
646          break;
647        }        }
648    
649      if (negated)      if (negated)
# Line 501  else Line 651  else
651        if (c > bracount)        if (c > bracount)
652          {          {
653          *errorcodeptr = ERR15;          *errorcodeptr = ERR15;
654          return 0;          break;
655          }          }
656        c = bracount - (c - 1);        c = bracount - (c - 1);
657        }        }
# Line 521  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683          if (c < 0)    /* Integer overflow */
684            {
685            *errorcodeptr = ERR61;
686            break;
687            }
688        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
689          {          {
690          c = -(ESC_REF + c);          c = -(ESC_REF + c);
# Line 542  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 555  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 566  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 576  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 604  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 620  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
782        *errorcodeptr = ERR2;        *errorcodeptr = ERR2;
783        return 0;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
794    
795      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
796      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphanumeric following \ is an error if PCRE_EXTRA was set;
797      for Perl compatibility, it is a literal. This code looks a bit odd, but      otherwise, for Perl compatibility, it is a literal. This code looks a bit
798      there used to be some cases other than the default, and there may be again      odd, but there used to be some cases other than the default, and there may
799      in future, so I haven't "optimized" it. */      be again in future, so I haven't "optimized" it. */
800    
801      default:      default:
802      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
# Line 694  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
858      }      }
859    for (i = 0; i < sizeof(name) - 1; i++)    for (i = 0; i < (int)sizeof(name) - 1; i++)
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 730  top = _pcre_utt_size; Line 885  top = _pcre_utt_size;
885  while (bot < top)  while (bot < top)
886    {    {
887    i = (bot + top) >> 1;    i = (bot + top) >> 1;
888    c = strcmp(name, _pcre_utt[i].name);    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
889    if (c == 0)    if (c == 0)
890      {      {
891      *dptr = _pcre_utt[i].value;      *dptr = _pcre_utt[i].value;
# Line 773  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 814  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 824  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 854  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    count        current count of capturing parens so far encountered    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, int count, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041    int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != '\\');        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
1124      }      }
1125    
1126    /* Skip over character classes */    /* Skip over character classes; this logic must be similar to the way they
1127      are handled for real. If the first character is '^', skip it. Also, if the
1128      first few characters (either before or after ^) are \Q\E or \E we skip them
1129      too. This makes for compatibility with Perl. Note the use of STR macros to
1130      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1133      {      {
1134      while (*(++ptr) != ']')      BOOL negate_class = FALSE;
1135        for (;;)
1136        {        {
1137        if (*ptr == '\\')        int c = *(++ptr);
1138          if (c == CHAR_BACKSLASH)
1139          {          {
1140          if (*(++ptr) == 0) return -1;          if (ptr[1] == CHAR_E)
1141          if (*ptr == 'Q') for (;;)            ptr++;
1142            else if (strncmp((const char *)ptr+1,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 3;
1145            else
1146              break;
1147            }
1148          else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
1149            negate_class = TRUE;
1150          else break;
1151          }
1152    
1153        /* If the next character is ']', it is a data character that must be
1154        skipped, except in JavaScript compatibility mode. */
1155    
1156        if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158          ptr++;
1159    
1160        while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1161          {
1162          if (*ptr == 0) return -1;
1163          if (*ptr == CHAR_BACKSLASH)
1164            {
1165            if (*(++ptr) == 0) goto FAIL_EXIT;
1166            if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != '\\');            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
1173          }          }
# Line 921  for (; *ptr != 0; ptr++) Line 1177  for (; *ptr != 0; ptr++)
1177    
1178    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1179    
1180    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != '\n');      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?')  
1190      {      {
1191      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1193      continue;      if (*ptr == 0) goto FAIL_EXIT;
1194      }      }
1195    
1196    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197    if (*ptr == 'P') ptr++;                      /* Allow optional P */      {
1198        if (dup_parens && *count < hwm_count) *count = hwm_count;
1199        *ptrptr = ptr;
1200        return -1;
1201        }
1202    
1203    /* We have to disambiguate (?<! and (?<= from (?<name> */    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207        }
1208      }
1209    
1210    if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  FAIL_EXIT:
1211         *ptr != '\'')  *ptrptr = ptr;
1212      continue;  return -1;
1213    }
1214    
1215    
1216    
1217    
1218    /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    /* This function scans along a pattern's text looking for capturing
1223    subpatterns, and counting them. If it finds a named pattern that matches the
1224    name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    returns when it reaches a given numbered subpattern. This is used for forward
1226    references to subpatterns. We used to be able to start this scan from the
1227    current compiling point, using the current count value from cd->bracount, and
1228    do it all in a single loop, but the addition of the possibility of duplicate
1229    subpattern numbers means that we have to scan from the very start, in order to
1230    take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233    Arguments:
1234      cd           compile background data
1235      name         name to seek, or NULL if seeking a numbered subpattern
1236      lorn         name length, or subpattern number if name is NULL
1237      xmode        TRUE if we are in /x mode
1238    
1239    Returns:       the number of the found subpattern, or -1 if not found
1240    */
1241    
1242    count++;  static int
1243    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    {
1245    uschar *ptr = (uschar *)cd->start_pattern;
1246    int count = 0;
1247    int rc;
1248    
1249    /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    if (name == NULL && count == lorn) return count;  for (;;)
1255    term = *ptr++;    {
1256    if (term == '<') term = '>';    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257    thisname = ptr;    if (rc > 0 || *ptr++ == 0) break;
   while (*ptr != term) ptr++;  
   if (name != NULL && lorn == ptr - thisname &&  
       strncmp((const char *)name, (const char *)thisname, lorn) == 0)  
     return count;  
1258    }    }
1259    
1260  return -1;  return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1059  for (;;) Line 1358  for (;;)
1358    {    {
1359    int d;    int d;
1360    register int op = *cc;    register int op = *cc;
   
1361    switch (op)    switch (op)
1362      {      {
1363      case OP_CBRA:      case OP_CBRA:
# Line 1148  for (;;) Line 1446  for (;;)
1446    
1447      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1448      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
1449        if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
1450      cc += 4;      cc += 4;
1451      break;      break;
1452    
# Line 1165  for (;;) Line 1464  for (;;)
1464      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1465      case OP_WORDCHAR:      case OP_WORDCHAR:
1466      case OP_ANY:      case OP_ANY:
1467        case OP_ALLANY:
1468      branchlength++;      branchlength++;
1469      cc++;      cc++;
1470      break;      break;
# Line 1256  for (;;) Line 1556  for (;;)
1556      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1557      }      }
1558    
1559    /* In UTF-8 mode, opcodes that are followed by a character may be followed by    /* Otherwise, we can get the item's length from the table, except that for
1560    a multi-byte character. The length in the table is a minimum, so we have to    repeated character types, we have to test for \p and \P, which have an extra
1561    arrange to skip the extra bytes. */    two bytes of parameters. */
1562    
1563    else    else
1564      {      {
1565        switch(c)
1566          {
1567          case OP_TYPESTAR:
1568          case OP_TYPEMINSTAR:
1569          case OP_TYPEPLUS:
1570          case OP_TYPEMINPLUS:
1571          case OP_TYPEQUERY:
1572          case OP_TYPEMINQUERY:
1573          case OP_TYPEPOSSTAR:
1574          case OP_TYPEPOSPLUS:
1575          case OP_TYPEPOSQUERY:
1576          if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1577          break;
1578    
1579          case OP_TYPEUPTO:
1580          case OP_TYPEMINUPTO:
1581          case OP_TYPEEXACT:
1582          case OP_TYPEPOSUPTO:
1583          if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
1584          break;
1585          }
1586    
1587        /* Add in the fixed length from the table */
1588    
1589      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1590    
1591      /* In UTF-8 mode, opcodes that are followed by a character may be followed by
1592      a multi-byte character. The length in the table is a minimum, so we have to
1593      arrange to skip the extra bytes. */
1594    
1595  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1596      if (utf8) switch(c)      if (utf8) switch(c)
1597        {        {
# Line 1284  for (;;) Line 1613  for (;;)
1613        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1614        break;        break;
1615        }        }
1616    #else
1617        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1618  #endif  #endif
1619      }      }
1620    }    }
# Line 1320  for (;;) Line 1651  for (;;)
1651    
1652    if (c == OP_XCLASS) code += GET(code, 1);    if (c == OP_XCLASS) code += GET(code, 1);
1653    
1654    /* Otherwise, we get the item's length from the table. In UTF-8 mode, opcodes    /* Otherwise, we can get the item's length from the table, except that for
1655    that are followed by a character may be followed by a multi-byte character.    repeated character types, we have to test for \p and \P, which have an extra
1656    The length in the table is a minimum, so we have to arrange to skip the extra    two bytes of parameters. */
   bytes. */  
1657    
1658    else    else
1659      {      {
1660        switch(c)
1661          {
1662          case OP_TYPESTAR:
1663          case OP_TYPEMINSTAR:
1664          case OP_TYPEPLUS:
1665          case OP_TYPEMINPLUS:
1666          case OP_TYPEQUERY:
1667          case OP_TYPEMINQUERY:
1668          case OP_TYPEPOSSTAR:
1669          case OP_TYPEPOSPLUS:
1670          case OP_TYPEPOSQUERY:
1671          if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1672          break;
1673    
1674          case OP_TYPEPOSUPTO:
1675          case OP_TYPEUPTO:
1676          case OP_TYPEMINUPTO:
1677          case OP_TYPEEXACT:
1678          if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
1679          break;
1680          }
1681    
1682        /* Add in the fixed length from the table */
1683    
1684      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1685    
1686        /* In UTF-8 mode, opcodes that are followed by a character may be followed
1687        by a multi-byte character. The length in the table is a minimum, so we have
1688        to arrange to skip the extra bytes. */
1689    
1690  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1691      if (utf8) switch(c)      if (utf8) switch(c)
1692        {        {
# Line 1349  for (;;) Line 1708  for (;;)
1708        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1709        break;        break;
1710        }        }
1711    #else
1712        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1713  #endif  #endif
1714      }      }
1715    }    }
# Line 1364  for (;;) Line 1725  for (;;)
1725  can match the empty string or not. It is called from could_be_empty()  can match the empty string or not. It is called from could_be_empty()
1726  below and from compile_branch() when checking for an unlimited repeat of a  below and from compile_branch() when checking for an unlimited repeat of a
1727  group that can match nothing. Note that first_significant_code() skips over  group that can match nothing. Note that first_significant_code() skips over
1728  assertions. If we hit an unclosed bracket, we return "empty" - this means we've  backward and negative forward assertions when its final argument is TRUE. If we
1729  struck an inner bracket whose current branch will already have been scanned.  hit an unclosed bracket, we return "empty" - this means we've struck an inner
1730    bracket whose current branch will already have been scanned.
1731    
1732  Arguments:  Arguments:
1733    code        points to start of search    code        points to start of search
# Line 1387  for (code = first_significant_code(code Line 1749  for (code = first_significant_code(code
1749    
1750    c = *code;    c = *code;
1751    
1752      /* Skip over forward assertions; the other assertions are skipped by
1753      first_significant_code() with a TRUE final argument. */
1754    
1755      if (c == OP_ASSERT)
1756        {
1757        do code += GET(code, 1); while (*code == OP_ALT);
1758        c = *code;
1759        continue;
1760        }
1761    
1762    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1763    
1764    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
1765      {      {
1766      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1767      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
# Line 1399  for (code = first_significant_code(code Line 1771  for (code = first_significant_code(code
1771    
1772    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1773    
1774    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
1775      {      {
1776      BOOL empty_branch;      BOOL empty_branch;
1777      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1778    
1779      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1780        empty branch, so just skip over the conditional, because it could be empty.
1781        Otherwise, scan the individual branches of the group. */
1782    
1783      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1784        code += GET(code, 1);        code += GET(code, 1);
1785        else
1786          {
1787          empty_branch = FALSE;
1788          do
1789            {
1790            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1791              empty_branch = TRUE;
1792            code += GET(code, 1);
1793            }
1794          while (*code == OP_ALT);
1795          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1796        }        }
1797      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1798      c = *code;      c = *code;
1799      continue;      continue;
1800      }      }
# Line 1423  for (code = first_significant_code(code Line 1803  for (code = first_significant_code(code
1803    
1804    switch (c)    switch (c)
1805      {      {
1806      /* Check for quantifiers after a class */      /* Check for quantifiers after a class. XCLASS is used for classes that
1807        cannot be represented just by a bit map. This includes negated single
1808        high-valued characters. The length in _pcre_OP_lengths[] is zero; the
1809        actual length is stored in the compiled code, so we must update "code"
1810        here. */
1811    
1812  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1813      case OP_XCLASS:      case OP_XCLASS:
1814      ccode = code + GET(code, 1);      ccode = code += GET(code, 1);
1815      goto CHECK_CLASS_REPEAT;      goto CHECK_CLASS_REPEAT;
1816  #endif  #endif
1817    
# Line 1471  for (code = first_significant_code(code Line 1855  for (code = first_significant_code(code
1855      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1856      case OP_WORDCHAR:      case OP_WORDCHAR:
1857      case OP_ANY:      case OP_ANY:
1858        case OP_ALLANY:
1859      case OP_ANYBYTE:      case OP_ANYBYTE:
1860      case OP_CHAR:      case OP_CHAR:
1861      case OP_CHARNC:      case OP_CHARNC:
# Line 1489  for (code = first_significant_code(code Line 1874  for (code = first_significant_code(code
1874      case OP_TYPEEXACT:      case OP_TYPEEXACT:
1875      return FALSE;      return FALSE;
1876    
1877        /* These are going to continue, as they may be empty, but we have to
1878        fudge the length for the \p and \P cases. */
1879    
1880        case OP_TYPESTAR:
1881        case OP_TYPEMINSTAR:
1882        case OP_TYPEPOSSTAR:
1883        case OP_TYPEQUERY:
1884        case OP_TYPEMINQUERY:
1885        case OP_TYPEPOSQUERY:
1886        if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1887        break;
1888    
1889        /* Same for these */
1890    
1891        case OP_TYPEUPTO:
1892        case OP_TYPEMINUPTO:
1893        case OP_TYPEPOSUPTO:
1894        if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
1895        break;
1896    
1897      /* End of branch */      /* End of branch */
1898    
1899      case OP_KET:      case OP_KET:
# Line 1558  return TRUE; Line 1963  return TRUE;
1963  *************************************************/  *************************************************/
1964    
1965  /* This function is called when the sequence "[:" or "[." or "[=" is  /* This function is called when the sequence "[:" or "[." or "[=" is
1966  encountered in a character class. It checks whether this is followed by an  encountered in a character class. It checks whether this is followed by a
1967  optional ^ and then a sequence of letters, terminated by a matching ":]" or  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
1968  ".]" or "=]".  reach an unescaped ']' without the special preceding character, return FALSE.
1969    
1970    Originally, this function only recognized a sequence of letters between the
1971    terminators, but it seems that Perl recognizes any sequence of characters,
1972    though of course unknown POSIX names are subsequently rejected. Perl gives an
1973    "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
1974    didn't consider this to be a POSIX class. Likewise for [:1234:].
1975    
1976    The problem in trying to be exactly like Perl is in the handling of escapes. We
1977    have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
1978    class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
1979    below handles the special case of \], but does not try to do any other escape
1980    processing. This makes it different from Perl for cases such as [:l\ower:]
1981    where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
1982    "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
1983    I think.
1984    
1985  Argument:  Arguments:
1986    ptr      pointer to the initial [    ptr      pointer to the initial [
1987    endptr   where to return the end pointer    endptr   where to return the end pointer
   cd       pointer to compile data  
1988    
1989  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1990  */  */
1991    
1992  static BOOL  static BOOL
1993  check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)  check_posix_syntax(const uschar *ptr, const uschar **endptr)
1994  {  {
1995  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
1996  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1997  if (*(++ptr) == '^') ptr++;  for (++ptr; *ptr != 0; ptr++)
 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;  
 if (*ptr == terminator && ptr[1] == ']')  
1998    {    {
1999    *endptr = ptr;    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
2000    return TRUE;      {
2001        if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2002        if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2003          {
2004          *endptr = ptr;
2005          return TRUE;
2006          }
2007        }
2008    }    }
2009  return FALSE;  return FALSE;
2010  }  }
# Line 1605  Returns: a value representing the na Line 2029  Returns: a value representing the na
2029  static int  static int
2030  check_posix_name(const uschar *ptr, int len)  check_posix_name(const uschar *ptr, int len)
2031  {  {
2032    const char *pn = posix_names;
2033  register int yield = 0;  register int yield = 0;
2034  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2035    {    {
2036    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2037      strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;      strncmp((const char *)ptr, pn, len) == 0) return yield;
2038      pn += posix_name_lengths[yield] + 1;
2039    yield++;    yield++;
2040    }    }
2041  return -1;  return -1;
# Line 1624  return -1; Line 2050  return -1;
2050  that is referenced. This means that groups can be replicated for fixed  that is referenced. This means that groups can be replicated for fixed
2051  repetition simply by copying (because the recursion is allowed to refer to  repetition simply by copying (because the recursion is allowed to refer to
2052  earlier groups that are outside the current group). However, when a group is  earlier groups that are outside the current group). However, when a group is
2053  optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before  optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
2054  it, after it has been compiled. This means that any OP_RECURSE items within it  inserted before it, after it has been compiled. This means that any OP_RECURSE
2055  that refer to the group itself or any contained groups have to have their  items within it that refer to the group itself or any contained groups have to
2056  offsets adjusted. That one of the jobs of this function. Before it is called,  have their offsets adjusted. That one of the jobs of this function. Before it
2057  the partially compiled regex must be temporarily terminated with OP_END.  is called, the partially compiled regex must be temporarily terminated with
2058    OP_END.
2059    
2060  This function has been extended with the possibility of forward references for  This function has been extended with the possibility of forward references for
2061  recursions and subroutine calls. It must also check the list of such references  recursions and subroutine calls. It must also check the list of such references
# Line 1651  adjust_recurse(uschar *group, int adjust Line 2078  adjust_recurse(uschar *group, int adjust
2078    uschar *save_hwm)    uschar *save_hwm)
2079  {  {
2080  uschar *ptr = group;  uschar *ptr = group;
2081    
2082  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)  while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
2083    {    {
2084    int offset;    int offset;
# Line 1762  get_othercase_range(unsigned int *cptr, Line 2190  get_othercase_range(unsigned int *cptr,
2190  unsigned int c, othercase, next;  unsigned int c, othercase, next;
2191    
2192  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
2193    { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }    { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
2194    
2195  if (c > d) return FALSE;  if (c > d) return FALSE;
2196    
# Line 1771  next = othercase + 1; Line 2199  next = othercase + 1;
2199    
2200  for (++c; c <= d; c++)  for (++c; c <= d; c++)
2201    {    {
2202    if (_pcre_ucp_othercase(c) != next) break;    if (UCD_OTHERCASE(c) != next) break;
2203    next++;    next++;
2204    }    }
2205    
# Line 1817  if ((options & PCRE_EXTENDED) != 0) Line 2245  if ((options & PCRE_EXTENDED) != 0)
2245    for (;;)    for (;;)
2246      {      {
2247      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2248      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2249        {        {
2250        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2251          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 1829  if ((options & PCRE_EXTENDED) != 0) Line 2257  if ((options & PCRE_EXTENDED) != 0)
2257  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2258  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2259    
2260  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2261    {    {
2262    int temperrorcode = 0;    int temperrorcode = 0;
2263    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 1854  if ((options & PCRE_EXTENDED) != 0) Line 2282  if ((options & PCRE_EXTENDED) != 0)
2282    for (;;)    for (;;)
2283      {      {
2284      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2285      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2286        {        {
2287        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2288          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 1865  if ((options & PCRE_EXTENDED) != 0) Line 2293  if ((options & PCRE_EXTENDED) != 0)
2293    
2294  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2295    
2296  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2297    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2298        return FALSE;
2299    
2300  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2301  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 1881  if (next >= 0) switch(op_code) Line 2310  if (next >= 0) switch(op_code)
2310    case OP_CHAR:    case OP_CHAR:
2311  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2312    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
2313    #else
2314      (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
2315  #endif  #endif
2316    return item != next;    return item != next;
2317    
# Line 1899  if (next >= 0) switch(op_code) Line 2330  if (next >= 0) switch(op_code)
2330      unsigned int othercase;      unsigned int othercase;
2331      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2332  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2333      othercase = _pcre_ucp_othercase((unsigned int)next);      othercase = UCD_OTHERCASE((unsigned int)next);
2334  #else  #else
2335      othercase = NOTACHAR;      othercase = NOTACHAR;
2336  #endif  #endif
# Line 1912  if (next >= 0) switch(op_code) Line 2343  if (next >= 0) switch(op_code)
2343    /* For OP_NOT, "item" must be a single-byte character. */    /* For OP_NOT, "item" must be a single-byte character. */
2344    
2345    case OP_NOT:    case OP_NOT:
   if (next < 0) return FALSE;  /* Not a character */  
2346    if (item == next) return TRUE;    if (item == next) return TRUE;
2347    if ((options & PCRE_CASELESS) == 0) return FALSE;    if ((options & PCRE_CASELESS) == 0) return FALSE;
2348  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1921  if (next >= 0) switch(op_code) Line 2351  if (next >= 0) switch(op_code)
2351      unsigned int othercase;      unsigned int othercase;
2352      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2353  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2354      othercase = _pcre_ucp_othercase(next);      othercase = UCD_OTHERCASE(next);
2355  #else  #else
2356      othercase = NOTACHAR;      othercase = NOTACHAR;
2357  #endif  #endif
# Line 2026  switch(op_code) Line 2456  switch(op_code)
2456    
2457      case ESC_W:      case ESC_W:
2458      return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;      return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;
2459    
2460      case ESC_h:      case ESC_h:
2461      case ESC_H:      case ESC_H:
2462      switch(item)      switch(item)
# Line 2053  switch(op_code) Line 2483  switch(op_code)
2483        return -next != ESC_h;        return -next != ESC_h;
2484        default:        default:
2485        return -next == ESC_h;        return -next == ESC_h;
2486        }        }
2487    
2488      case ESC_v:      case ESC_v:
2489      case ESC_V:      case ESC_V:
2490      switch(item)      switch(item)
# Line 2069  switch(op_code) Line 2499  switch(op_code)
2499        return -next != ESC_v;        return -next != ESC_v;
2500        default:        default:
2501        return -next == ESC_v;        return -next == ESC_v;
2502        }        }
2503    
2504      default:      default:
2505      return FALSE;      return FALSE;
# Line 2093  switch(op_code) Line 2523  switch(op_code)
2523    
2524    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
2525    return next == -ESC_h;    return next == -ESC_h;
2526    
2527    /* Can't have \S in here because VT matches \S (Perl anomaly) */    /* Can't have \S in here because VT matches \S (Perl anomaly) */
2528    case OP_VSPACE:    case OP_VSPACE:
2529    return next == -ESC_V || next == -ESC_d || next == -ESC_w;    return next == -ESC_V || next == -ESC_d || next == -ESC_w;
2530    
2531    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
2532    return next == -ESC_v;    return next == -ESC_v;
2533    
2534    case OP_WORDCHAR:    case OP_WORDCHAR:
2535    return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;
2536    
2537    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
2538    return next == -ESC_w || next == -ESC_d;    return next == -ESC_w || next == -ESC_d;
2539    
2540    default:    default:
2541    return FALSE;    return FALSE;
2542    }    }
# Line 2175  uschar classbits[32]; Line 2605  uschar classbits[32];
2605  BOOL class_utf8;  BOOL class_utf8;
2606  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
2607  uschar *class_utf8data;  uschar *class_utf8data;
2608    uschar *class_utf8data_base;
2609  uschar utf8_char[6];  uschar utf8_char[6];
2610  #else  #else
2611  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
# Line 2214  req_caseopt = ((options & PCRE_CASELESS) Line 2645  req_caseopt = ((options & PCRE_CASELESS)
2645  for (;; ptr++)  for (;; ptr++)
2646    {    {
2647    BOOL negate_class;    BOOL negate_class;
2648      BOOL should_flip_negation;
2649    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2650    BOOL is_quantifier;    BOOL is_quantifier;
2651    BOOL is_recurse;    BOOL is_recurse;
# Line 2255  for (;; ptr++) Line 2687  for (;; ptr++)
2687      */      */
2688    
2689      if (code < last_code) code = last_code;      if (code < last_code) code = last_code;
2690    
2691        /* Paranoid check for integer overflow */
2692    
2693        if (OFLOW_MAX - *lengthptr < code - last_code)
2694          {
2695          *errorcodeptr = ERR20;
2696          goto FAILED;
2697          }
2698    
2699      *lengthptr += code - last_code;      *lengthptr += code - last_code;
2700      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
2701    
# Line 2292  for (;; ptr++) Line 2733  for (;; ptr++)
2733    
2734    if (inescq && c != 0)    if (inescq && c != 0)
2735      {      {
2736      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2737        {        {
2738        inescq = FALSE;        inescq = FALSE;
2739        ptr++;        ptr++;
# Line 2318  for (;; ptr++) Line 2759  for (;; ptr++)
2759    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2760    a quantifier. */    a quantifier. */
2761    
2762    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2763      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2764        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2765    
2766    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2767         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2334  for (;; ptr++) Line 2776  for (;; ptr++)
2776    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2777      {      {
2778      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2779      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2780        {        {
2781        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2782          {          {
# Line 2359  for (;; ptr++) Line 2801  for (;; ptr++)
2801      {      {
2802      /* ===================================================================*/      /* ===================================================================*/
2803      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2804      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2805      case ')':      case CHAR_RIGHT_PARENTHESIS:
2806      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2807      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2808      *codeptr = code;      *codeptr = code;
2809      *ptrptr = ptr;      *ptrptr = ptr;
2810      if (lengthptr != NULL)      if (lengthptr != NULL)
2811        {        {
2812          if (OFLOW_MAX - *lengthptr < code - last_code)
2813            {
2814            *errorcodeptr = ERR20;
2815            goto FAILED;
2816            }
2817        *lengthptr += code - last_code;   /* To include callout length */        *lengthptr += code - last_code;   /* To include callout length */
2818        DPRINTF((">> end branch\n"));        DPRINTF((">> end branch\n"));
2819        }        }
# Line 2377  for (;; ptr++) Line 2824  for (;; ptr++)
2824      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2825      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2826    
2827      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2828      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2829        {        {
2830        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2386  for (;; ptr++) Line 2833  for (;; ptr++)
2833      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2834      break;      break;
2835    
2836      case '$':      case CHAR_DOLLAR_SIGN:
2837      previous = NULL;      previous = NULL;
2838      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2839      break;      break;
# Line 2394  for (;; ptr++) Line 2841  for (;; ptr++)
2841      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2842      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2843    
2844      case '.':      case CHAR_DOT:
2845      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2846      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2847      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
2848      previous = code;      previous = code;
2849      *code++ = OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
2850      break;      break;
2851    
2852    
# Line 2414  for (;; ptr++) Line 2861  for (;; ptr++)
2861      opcode is compiled. It may optionally have a bit map for characters < 256,      opcode is compiled. It may optionally have a bit map for characters < 256,
2862      but those above are are explicitly listed afterwards. A flag byte tells      but those above are are explicitly listed afterwards. A flag byte tells
2863      whether the bitmap is present, and whether this is a negated class or not.      whether the bitmap is present, and whether this is a negated class or not.
     */  
2864    
2865      case '[':      In JavaScript compatibility mode, an isolated ']' causes an error. In
2866        default (Perl) mode, it is treated as a data character. */
2867    
2868        case CHAR_RIGHT_SQUARE_BRACKET:
2869        if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2870          {
2871          *errorcodeptr = ERR64;
2872          goto FAILED;
2873          }
2874        goto NORMAL_CHAR;
2875    
2876        case CHAR_LEFT_SQUARE_BRACKET:
2877      previous = code;      previous = code;
2878    
2879      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2880      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2881    
2882      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2883          check_posix_syntax(ptr, &tempptr, cd))           ptr[1] == CHAR_EQUALS_SIGN) &&
2884            check_posix_syntax(ptr, &tempptr))
2885        {        {
2886        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2887        goto FAILED;        goto FAILED;
2888        }        }
2889    
2890      /* If the first character is '^', set the negation flag and skip it. */      /* If the first character is '^', set the negation flag and skip it. Also,
2891        if the first few characters (either before or after ^) are \Q\E or \E we
2892        skip them too. This makes for compatibility with Perl. */
2893    
2894      if ((c = *(++ptr)) == '^')      negate_class = FALSE;
2895        for (;;)
2896        {        {
       negate_class = TRUE;  
2897        c = *(++ptr);        c = *(++ptr);
2898          if (c == CHAR_BACKSLASH)
2899            {
2900            if (ptr[1] == CHAR_E)
2901              ptr++;
2902            else if (strncmp((const char *)ptr+1,
2903                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2904              ptr += 3;
2905            else
2906              break;
2907            }
2908          else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2909            negate_class = TRUE;
2910          else break;
2911        }        }
2912      else  
2913        /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
2914        an initial ']' is taken as a data character -- the code below handles
2915        that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2916        [^] must match any character, so generate OP_ALLANY. */
2917    
2918        if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2919            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2920        {        {
2921        negate_class = FALSE;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
2922          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2923          zerofirstbyte = firstbyte;
2924          break;
2925        }        }
2926    
2927        /* If a class contains a negative special such as \S, we need to flip the
2928        negation flag at the end, so that support for characters > 255 works
2929        correctly (they are all included in the class). */
2930    
2931        should_flip_negation = FALSE;
2932    
2933      /* Keep a count of chars with values < 256 so that we can optimize the case      /* Keep a count of chars with values < 256 so that we can optimize the case
2934      of just a single character (as long as it's < 256). However, For higher      of just a single character (as long as it's < 256). However, For higher
2935      valued UTF-8 characters, we don't yet do any optimization. */      valued UTF-8 characters, we don't yet do any optimization. */
# Line 2458  for (;; ptr++) Line 2947  for (;; ptr++)
2947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2948      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
2949      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
2950        class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
2951  #endif  #endif
2952    
2953      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 2473  for (;; ptr++) Line 2963  for (;; ptr++)
2963          {                           /* Braces are required because the */          {                           /* Braces are required because the */
2964          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
2965          }          }
2966    
2967          /* In the pre-compile phase, accumulate the length of any UTF-8 extra
2968          data and reset the pointer. This is so that very large classes that
2969          contain a zillion UTF-8 characters no longer overwrite the work space
2970          (which is on the stack). */
2971    
2972          if (lengthptr != NULL)
2973            {
2974            *lengthptr += class_utf8data - class_utf8data_base;
2975            class_utf8data = class_utf8data_base;
2976            }
2977    
2978  #endif  #endif
2979    
2980        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
2981    
2982        if (inescq)        if (inescq)
2983          {          {
2984          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2985            {            {
2986            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2987            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2494  for (;; ptr++) Line 2996  for (;; ptr++)
2996        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2997        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2998    
2999        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
3000            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3001            check_posix_syntax(ptr, &tempptr, cd))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3002          {          {
3003          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3004          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3005          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3006          uschar pbits[32];          uschar pbits[32];
3007    
3008          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3009            {            {
3010            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3011            goto FAILED;            goto FAILED;
3012            }            }
3013    
3014          ptr += 2;          ptr += 2;
3015          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3016            {            {
3017            local_negate = TRUE;            local_negate = TRUE;
3018              should_flip_negation = TRUE;  /* Note negative special */
3019            ptr++;            ptr++;
3020            }            }
3021    
# Line 2579  for (;; ptr++) Line 3082  for (;; ptr++)
3082        of the specials, which just set a flag. The sequence \b is a special        of the specials, which just set a flag. The sequence \b is a special
3083        case. Inside a class (and only there) it is treated as backspace.        case. Inside a class (and only there) it is treated as backspace.
3084        Elsewhere it marks a word boundary. Other escapes have preset maps ready        Elsewhere it marks a word boundary. Other escapes have preset maps ready
3085        to or into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3086        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3087    
3088        if (c == '\\')        if (c == CHAR_BACKSLASH)
3089          {          {
3090          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3091          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3092    
3093          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3094          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3095          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3096          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3097            {            {
3098            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3099              {              {
3100              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3101              }              }
3102            else inescq = TRUE;            else inescq = TRUE;
3103            continue;            continue;
3104            }            }
3105            else if (-c == ESC_E) continue;  /* Ignore orphan \E */
3106    
3107          if (c < 0)          if (c < 0)
3108            {            {
# Line 2614  for (;; ptr++) Line 3118  for (;; ptr++)
3118              continue;              continue;
3119    
3120              case ESC_D:              case ESC_D:
3121                should_flip_negation = TRUE;
3122              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
3123              continue;              continue;
3124    
# Line 2622  for (;; ptr++) Line 3127  for (;; ptr++)
3127              continue;              continue;
3128    
3129              case ESC_W:              case ESC_W:
3130                should_flip_negation = TRUE;
3131              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
3132              continue;              continue;
3133    
# Line 2631  for (;; ptr++) Line 3137  for (;; ptr++)
3137              continue;              continue;
3138    
3139              case ESC_S:              case ESC_S:
3140                should_flip_negation = TRUE;
3141              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
3142              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
3143              continue;              continue;
3144    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
3145              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
3146              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
3147              }              }
# Line 2817  for (;; ptr++) Line 3321  for (;; ptr++)
3321        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3322    
3323        CHECK_RANGE:        CHECK_RANGE:
3324        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3325          {          {
3326          inescq = FALSE;          inescq = FALSE;
3327          ptr += 2;          ptr += 2;
# Line 2825  for (;; ptr++) Line 3329  for (;; ptr++)
3329    
3330        oldptr = ptr;        oldptr = ptr;
3331    
3332        if (!inescq && ptr[1] == '-')        /* Remember \r or \n */
3333    
3334          if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3335    
3336          /* Check for range */
3337    
3338          if (!inescq && ptr[1] == CHAR_MINUS)
3339          {          {
3340          int d;          int d;
3341          ptr += 2;          ptr += 2;
3342          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3343    
3344          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3345          mode. */          mode. */
3346    
3347          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3348            {            {
3349            ptr += 2;            ptr += 2;
3350            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3351                { ptr += 2; continue; }
3352            inescq = TRUE;            inescq = TRUE;
3353            break;            break;
3354            }            }
3355    
3356          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3357            {            {
3358            ptr = oldptr;            ptr = oldptr;
3359            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 2861  for (;; ptr++) Line 3372  for (;; ptr++)
3372          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3373          in such circumstances. */          in such circumstances. */
3374    
3375          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3376            {            {
3377            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3378            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3379    
3380            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3381            special means the '-' was literal */            special means the '-' was literal */
3382    
3383            if (d < 0)            if (d < 0)
3384              {              {
3385              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3386              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3387              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3388                {                {
3389                ptr = oldptr;                ptr = oldptr;
3390                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 2892  for (;; ptr++) Line 3403  for (;; ptr++)
3403    
3404          if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */          if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */
3405    
3406            /* Remember \r or \n */
3407    
3408            if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3409    
3410          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3411          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
3412          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
# Line 3010  for (;; ptr++) Line 3525  for (;; ptr++)
3525          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
3526            {            {
3527            unsigned int othercase;            unsigned int othercase;
3528            if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)            if ((othercase = UCD_OTHERCASE(c)) != c)
3529              {              {
3530              *class_utf8data++ = XCL_SINGLE;              *class_utf8data++ = XCL_SINGLE;
3531              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
# Line 3037  for (;; ptr++) Line 3552  for (;; ptr++)
3552    
3553      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3554    
3555      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3556    
3557      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3558        {        {
# Line 3045  for (;; ptr++) Line 3560  for (;; ptr++)
3560        goto FAILED;        goto FAILED;
3561        }        }
3562    
3563    
3564    /* This code has been disabled because it would mean that \s counts as
3565    an explicit \r or \n reference, and that's not really what is wanted. Now
3566    we set the flag only if there is a literal "\r" or "\n" in the class. */
3567    
3568    #if 0
3569        /* Remember whether \r or \n are in this class */
3570    
3571        if (negate_class)
3572          {
3573          if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
3574          }
3575        else
3576          {
3577          if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
3578          }
3579    #endif
3580    
3581    
3582      /* If class_charcount is 1, we saw precisely one character whose value is      /* If class_charcount is 1, we saw precisely one character whose value is
3583      less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we      less than 256. As long as there were no characters >= 128 and there was no
3584      can optimize the negative case only if there were no characters >= 128      use of \p or \P, in other words, no use of any XCLASS features, we can
3585      because OP_NOT and the related opcodes like OP_NOTSTAR operate on      optimize.
3586      single-bytes only. This is an historical hangover. Maybe one day we can  
3587      tidy these opcodes to handle multi-byte characters.      In UTF-8 mode, we can optimize the negative case only if there were no
3588        characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
3589        operate on single-bytes only. This is an historical hangover. Maybe one day
3590        we can tidy these opcodes to handle multi-byte characters.
3591    
3592      The optimization throws away the bit map. We turn the item into a      The optimization throws away the bit map. We turn the item into a
3593      1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note      1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note
# Line 3060  for (;; ptr++) Line 3597  for (;; ptr++)
3597      reqbyte, save the previous value for reinstating. */      reqbyte, save the previous value for reinstating. */
3598    
3599  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3600      if (class_charcount == 1 &&      if (class_charcount == 1 && !class_utf8 &&
3601            (!utf8 ||        (!utf8 || !negate_class || class_lastchar < 128))
           (!class_utf8 && (!negate_class || class_lastchar < 128))))  
   
3602  #else  #else
3603      if (class_charcount == 1)      if (class_charcount == 1)
3604  #endif  #endif
# Line 3106  for (;; ptr++) Line 3641  for (;; ptr++)
3641      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
3642    
3643      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
3644      extended class, with its own opcode. If there are no characters < 256,      extended class, with its own opcode, unless there was a negated special
3645      we can omit the bitmap in the actual compiled code. */      such as \S in the class, because in that case all characters > 255 are in
3646        the class, so any that were explicitly given as well can be ignored. If
3647        (when there are explicit characters > 255 that must be listed) there are no
3648        characters < 256, we can omit the bitmap in the actual compiled code. */
3649    
3650  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3651      if (class_utf8)      if (class_utf8 && !should_flip_negation)
3652        {        {
3653        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
3654        *code++ = OP_XCLASS;        *code++ = OP_XCLASS;
# Line 3136  for (;; ptr++) Line 3674  for (;; ptr++)
3674        }        }
3675  #endif  #endif
3676    
3677      /* If there are no characters > 255, negate the 32-byte map if necessary,      /* If there are no characters > 255, set the opcode to OP_CLASS or
3678      and copy it into the code vector. If this is the first thing in the branch,      OP_NCLASS, depending on whether the whole class was negated and whether
3679      there can be no first char setting, whatever the repeat count. Any reqbyte      there were negative specials such as \S in the class. Then copy the 32-byte
3680      setting must remain unchanged after any kind of repeat. */      map into the code vector, negating it if necessary. */
3681    
3682        *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
3683      if (negate_class)      if (negate_class)
3684        {        {
       *code++ = OP_NCLASS;  
3685        if (lengthptr == NULL)    /* Save time in the pre-compile phase */        if (lengthptr == NULL)    /* Save time in the pre-compile phase */
3686          for (c = 0; c < 32; c++) code[c] = ~classbits[c];          for (c = 0; c < 32; c++) code[c] = ~classbits[c];
3687        }        }
3688      else      else
3689        {        {
       *code++ = OP_CLASS;  
3690        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
3691        }        }
3692      code += 32;      code += 32;
# Line 3160  for (;; ptr++) Line 3697  for (;; ptr++)
3697      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3698      has been tested above. */      has been tested above. */
3699    
3700      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3701      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3702      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3703      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3704      goto REPEAT;      goto REPEAT;
3705    
3706      case '*':      case CHAR_ASTERISK:
3707      repeat_min = 0;      repeat_min = 0;
3708      repeat_max = -1;      repeat_max = -1;
3709      goto REPEAT;      goto REPEAT;
3710    
3711      case '+':      case CHAR_PLUS:
3712      repeat_min = 1;      repeat_min = 1;
3713      repeat_max = -1;      repeat_max = -1;
3714      goto REPEAT;      goto REPEAT;
3715    
3716      case '?':      case CHAR_QUESTION_MARK:
3717      repeat_min = 0;      repeat_min = 0;
3718      repeat_max = 1;      repeat_max = 1;
3719    
# Line 3211  for (;; ptr++) Line 3748  for (;; ptr++)
3748      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3749      repeat type to the non-default. */      repeat type to the non-default. */
3750    
3751      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3752        {        {
3753        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3754        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3755        ptr++;        ptr++;
3756        }        }
3757      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3758        {        {
3759        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3760        ptr++;        ptr++;
# Line 3335  for (;; ptr++) Line 3872  for (;; ptr++)
3872        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3873        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3874    
3875        if (repeat_max != 1) cd->nopartial = TRUE;        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
3876    
3877        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3878    
# Line 3485  for (;; ptr++) Line 4022  for (;; ptr++)
4022        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
4023        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
4024    
4025        if (repeat_max != 1) cd->nopartial = TRUE;        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
4026    
4027        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
4028          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 3521  for (;; ptr++) Line 4058  for (;; ptr++)
4058          goto FAILED;          goto FAILED;
4059          }          }
4060    
       /* This is a paranoid check to stop integer overflow later on */  
   
       if (len > MAX_DUPLENGTH)  
         {  
         *errorcodeptr = ERR50;  
         goto FAILED;  
         }  
   
4061        /* If the maximum repeat count is unlimited, find the end of the bracket        /* If the maximum repeat count is unlimited, find the end of the bracket
4062        by scanning through from the start, and compute the offset back to it        by scanning through from the start, and compute the offset back to it
4063        from the current code pointer. There may be an OP_OPT setting following        from the current code pointer. There may be an OP_OPT setting following
# Line 3551  for (;; ptr++) Line 4080  for (;; ptr++)
4080    
4081        if (repeat_min == 0)        if (repeat_min == 0)
4082          {          {
4083          /* If the maximum is also zero, we just omit the group from the output          /* If the maximum is also zero, we used to just omit the group from the
4084          altogether. */          output altogether, like this:
4085    
4086          if (repeat_max == 0)          ** if (repeat_max == 0)
4087            {          **   {
4088            code = previous;          **   code = previous;
4089            goto END_REPEAT;          **   goto END_REPEAT;
4090            }          **   }
4091    
4092          /* If the maximum is 1 or unlimited, we just have to stick in the          However, that fails when a group is referenced as a subroutine from
4093          BRAZERO and do no more at this point. However, we do need to adjust          elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
4094          any OP_RECURSE calls inside the group that refer to the group itself or          so that it is skipped on execution. As we don't have a list of which
4095          any internal or forward referenced group, because the offset is from          groups are referenced, we cannot do this selectively.
4096          the start of the whole regex. Temporarily terminate the pattern while  
4097          doing this. */          If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
4098            and do no more at this point. However, we do need to adjust any
4099            OP_RECURSE calls inside the group that refer to the group itself or any
4100            internal or forward referenced group, because the offset is from the
4101            start of the whole regex. Temporarily terminate the pattern while doing
4102            this. */
4103    
4104          if (repeat_max <= 1)          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
4105            {            {
4106            *code = OP_END;            *code = OP_END;
4107            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4108            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
4109            code++;            code++;
4110              if (repeat_max == 0)
4111                {
4112                *previous++ = OP_SKIPZERO;
4113                goto END_REPEAT;
4114                }
4115            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4116            }            }
4117    
# Line 3617  for (;; ptr++) Line 4156  for (;; ptr++)
4156          if (repeat_min > 1)          if (repeat_min > 1)
4157            {            {
4158            /* In the pre-compile phase, we don't actually do the replication. We            /* In the pre-compile phase, we don't actually do the replication. We
4159            just adjust the length as if we had. */            just adjust the length as if we had. Do some paranoid checks for
4160              potential integer overflow. */
4161    
4162            if (lengthptr != NULL)            if (lengthptr != NULL)
4163              *lengthptr += (repeat_min - 1)*length_prevgroup;              {
4164                int delta = (repeat_min - 1)*length_prevgroup;
4165                if ((double)(repeat_min - 1)*(double)length_prevgroup >
4166                                                                (double)INT_MAX ||
4167                    OFLOW_MAX - *lengthptr < delta)
4168                  {
4169                  *errorcodeptr = ERR20;
4170                  goto FAILED;
4171                  }
4172                *lengthptr += delta;
4173                }
4174    
4175            /* This is compiling for real */            /* This is compiling for real */
4176    
# Line 3658  for (;; ptr++) Line 4208  for (;; ptr++)
4208          /* In the pre-compile phase, we don't actually do the replication. We          /* In the pre-compile phase, we don't actually do the replication. We
4209          just adjust the length as if we had. For each repetition we must add 1          just adjust the length as if we had. For each repetition we must add 1
4210          to the length for BRAZERO and for all but the last repetition we must          to the length for BRAZERO and for all but the last repetition we must
4211          add 2 + 2*LINKSIZE to allow for the nesting that occurs. */          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4212            paranoid checks to avoid integer overflow. */
4213    
4214          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4215            *lengthptr += repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            {
4216              2 - 2*LINK_SIZE;  /* Last one doesn't nest */            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
4217                          2 - 2*LINK_SIZE;   /* Last one doesn't nest */
4218              if ((double)repeat_max *
4219                    (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
4220                      > (double)INT_MAX ||
4221                  OFLOW_MAX - *lengthptr < delta)
4222                {
4223                *errorcodeptr = ERR20;
4224                goto FAILED;
4225                }
4226              *lengthptr += delta;
4227              }
4228    
4229          /* This is compiling for real */          /* This is compiling for real */
4230    
# Line 3744  for (;; ptr++) Line 4306  for (;; ptr++)
4306          }          }
4307        }        }
4308    
4309        /* If previous is OP_FAIL, it was generated by an empty class [] in
4310        JavaScript mode. The other ways in which OP_FAIL can be generated, that is
4311        by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
4312        error above. We can just ignore the repeat in JS case. */
4313    
4314        else if (*previous == OP_FAIL) goto END_REPEAT;
4315    
4316      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
4317    
4318      else      else
# Line 3770  for (;; ptr++) Line 4339  for (;; ptr++)
4339        int len;        int len;
4340        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
4341            *tempcode == OP_NOTEXACT)            *tempcode == OP_NOTEXACT)
4342          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode] +
4343              ((*tempcode == OP_TYPEEXACT &&
4344                 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
4345        len = code - tempcode;        len = code - tempcode;
4346        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4347          {          {
# Line 3814  for (;; ptr++) Line 4385  for (;; ptr++)
4385      /* ===================================================================*/      /* ===================================================================*/
4386      /* Start of nested parenthesized sub-expression, or comment or lookahead or      /* Start of nested parenthesized sub-expression, or comment or lookahead or
4387      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4388      parenthesis forms. First deal with the specials; all are introduced by ?,      parenthesis forms.  */
     and the appearance of any of them means that this is not a capturing  
     group. */  
4389    
4390      case '(':      case CHAR_LEFT_PARENTHESIS:
4391      newoptions = options;      newoptions = options;
4392      skipbytes = 0;      skipbytes = 0;
4393      bravalue = OP_CBRA;      bravalue = OP_CBRA;
4394      save_hwm = cd->hwm;      save_hwm = cd->hwm;
4395      reset_bracount = FALSE;      reset_bracount = FALSE;
4396    
4397      if (*(++ptr) == '?')      /* First deal with various "verbs" that can be introduced by '*'. */
4398    
4399        if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4400          {
4401          int i, namelen;
4402          const char *vn = verbnames;
4403          const uschar *name = ++ptr;
4404          previous = NULL;
4405          while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4406          if (*ptr == CHAR_COLON)
4407            {
4408            *errorcodeptr = ERR59;   /* Not supported */
4409            goto FAILED;
4410            }
4411          if (*ptr != CHAR_RIGHT_PARENTHESIS)
4412            {
4413            *errorcodeptr = ERR60;
4414            goto FAILED;
4415            }
4416          namelen = ptr - name;
4417          for (i = 0; i < verbcount; i++)
4418            {
4419            if (namelen == verbs[i].len &&
4420                strncmp((char *)name, vn, namelen) == 0)
4421              {
4422              *code = verbs[i].op;
4423              if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
4424              break;
4425              }
4426            vn += verbs[i].len + 1;
4427            }
4428          if (i < verbcount) continue;
4429          *errorcodeptr = ERR60;
4430          goto FAILED;
4431          }
4432    
4433        /* Deal with the extended parentheses; all are introduced by '?', and the
4434        appearance of any of them means that this is not a capturing group. */
4435    
4436        else if (*ptr == CHAR_QUESTION_MARK)
4437        {        {
4438        int i, set, unset, namelen;        int i, set, unset, namelen;
4439        int *optset;        int *optset;
# Line 3834  for (;; ptr++) Line 4442  for (;; ptr++)
4442    
4443        switch (*(++ptr))        switch (*(++ptr))
4444          {          {
4445          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4446          ptr++;          ptr++;
4447          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4448          if (*ptr == 0)          if (*ptr == 0)
4449            {            {
4450            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 3846  for (;; ptr++) Line 4454  for (;; ptr++)
4454    
4455    
4456          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4457          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4458          reset_bracount = TRUE;          reset_bracount = TRUE;
4459          /* Fall through */          /* Fall through */
4460    
4461          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4462          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4463          bravalue = OP_BRA;          bravalue = OP_BRA;
4464          ptr++;          ptr++;
4465          break;          break;
4466    
4467    
4468          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4469          case '(':          case CHAR_LEFT_PARENTHESIS:
4470          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4471    
4472          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 3878  for (;; ptr++) Line 4486  for (;; ptr++)
4486          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4487          including assertions, are processed. */          including assertions, are processed. */
4488    
4489          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4490                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4491            break;            break;
4492    
4493          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 3890  for (;; ptr++) Line 4499  for (;; ptr++)
4499    
4500          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4501    
4502          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4503            {            {
4504            terminator = -1;            terminator = -1;
4505            ptr += 2;            ptr += 2;
# Line 3900  for (;; ptr++) Line 4509  for (;; ptr++)
4509          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4510          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4511    
4512          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4513            {            {
4514            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4515            ptr++;            ptr++;
4516            }            }
4517          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4518            {            {
4519            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4520            ptr++;            ptr++;
4521            }            }
4522          else          else
4523            {            {
4524            terminator = 0;            terminator = 0;
4525            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4526            }            }
4527    
4528          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 3933  for (;; ptr++) Line 4542  for (;; ptr++)
4542            {            {
4543            if (recno >= 0)            if (recno >= 0)
4544              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4545                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4546            ptr++;            ptr++;
4547            }            }
4548          namelen = ptr - name;          namelen = ptr - name;
4549    
4550          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4551                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4552            {            {
4553            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4554            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 3960  for (;; ptr++) Line 4570  for (;; ptr++)
4570              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4571              goto FAILED;              goto FAILED;
4572              }              }
4573            if (refsign == '-')            recno = (refsign == CHAR_MINUS)?
4574                cd->bracount - recno + 1 : recno +cd->bracount;
4575              if (recno <= 0 || recno > cd->final_bracount)
4576              {              {
4577              recno = cd->bracount - recno + 1;              *errorcodeptr = ERR15;
4578              if (recno <= 0)              goto FAILED;
               {  
               *errorcodeptr = ERR15;  
               goto FAILED;  
               }  
4579              }              }
           else recno += cd->bracount;  
4580            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4581            break;            break;
4582            }            }
# Line 3994  for (;; ptr++) Line 4601  for (;; ptr++)
4601    
4602          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4603    
4604          else if ((i = find_parens(ptr, cd->bracount, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4605                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4606            {            {
4607            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4015  for (;; ptr++) Line 4622  for (;; ptr++)
4622          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4623          specific group number. */          specific group number. */
4624    
4625          else if (*name == 'R')          else if (*name == CHAR_R)
4626            {            {
4627            recno = 0;            recno = 0;
4628            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4025  for (;; ptr++) Line 4632  for (;; ptr++)
4632                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4633                goto FAILED;                goto FAILED;
4634                }                }
4635              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4636              }              }
4637            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4638            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4035  for (;; ptr++) Line 4642  for (;; ptr++)
4642          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4643          false. */          false. */
4644    
4645          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4646            {            {
4647            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4648            skipbytes = 1;            skipbytes = 1;
4649            }            }
4650    
4651          /* Check for the "name" actually being a subpattern number. */          /* Check for the "name" actually being a subpattern number. We are
4652            in the second pass here, so final_bracount is set. */
4653    
4654          else if (recno > 0)          else if (recno > 0 && recno <= cd->final_bracount)
4655            {            {
4656            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
4657            }            }
# Line 4059  for (;; ptr++) Line 4667  for (;; ptr++)
4667    
4668    
4669          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4670          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4671          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4672          ptr++;          ptr++;
4673          break;          break;
4674    
4675    
4676          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4677          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
         bravalue = OP_ASSERT_NOT;  
4678          ptr++;          ptr++;
4679            if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4680              {
4681              *code++ = OP_FAIL;
4682              previous = NULL;
4683              continue;
4684              }
4685            bravalue = OP_ASSERT_NOT;
4686          break;          break;
4687    
4688    
4689          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4690          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4691          switch (ptr[1])          switch (ptr[1])
4692            {            {
4693            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4694            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4695            ptr += 2;            ptr += 2;
4696            break;            break;
4697    
4698            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4699            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4700            ptr += 2;            ptr += 2;
4701            break;            break;
# Line 4096  for (;; ptr++) Line 4710  for (;; ptr++)
4710    
4711    
4712          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4713          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4714          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4715          ptr++;          ptr++;
4716          break;          break;
4717    
4718    
4719          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4720          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4721          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4722          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4723          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4724            {            {
4725            int n = 0;            int n = 0;
4726            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4727              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4728            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4729              {              {
4730              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4731              goto FAILED;              goto FAILED;
# Line 4131  for (;; ptr++) Line 4745  for (;; ptr++)
4745    
4746    
4747          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4748          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4749          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4750                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4751            {            {
4752            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4753            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4754            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4755            }            }
4756          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4757            {            {
4758            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4759            goto FAILED;            goto FAILED;
# Line 4148  for (;; ptr++) Line 4763  for (;; ptr++)
4763    
4764          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4765          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4766          case '\'':          case CHAR_APOSTROPHE:
4767            {            {
4768            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4769                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4770            name = ++ptr;            name = ++ptr;
4771    
4772            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4224  for (;; ptr++) Line 4840  for (;; ptr++)
4840    
4841    
4842          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4843          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4844          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4845          is_recurse = TRUE;          is_recurse = TRUE;
4846          /* Fall through */          /* Fall through */
4847    
4848          /* We come here from the Python syntax above that handles both          /* We come here from the Python syntax above that handles both
4849          references (?P=name) and recursion (?P>name), as well as falling          references (?P=name) and recursion (?P>name), as well as falling
4850          through from the Perl recursion syntax (?&name). */          through from the Perl recursion syntax (?&name). We also come here from
4851            the Perl \k<name> or \k'name' back reference syntax and the \k{name}
4852            .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
4853    
4854          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
4855          name = ++ptr;          name = ++ptr;
# Line 4243  for (;; ptr++) Line 4861  for (;; ptr++)
4861    
4862          if (lengthptr != NULL)          if (lengthptr != NULL)
4863            {            {
4864              if (namelen == 0)
4865                {
4866                *errorcodeptr = ERR62;
4867                goto FAILED;
4868                }
4869            if (*ptr != terminator)            if (*ptr != terminator)
4870              {              {
4871              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4256  for (;; ptr++) Line 4879  for (;; ptr++)
4879            recno = 0;            recno = 0;
4880            }            }
4881    
4882          /* In the real compile, seek the name in the table */          /* In the real compile, seek the name in the table. We check the name
4883            first, and then check that we have reached the end of the name in the
4884            table. That way, if the name that is longer than any in the table,
4885            the comparison will fail without reading beyond the table entry. */
4886    
4887          else          else
4888            {            {
4889            slot = cd->name_table;            slot = cd->name_table;
4890            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4891              {              {
4892              if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4893                    slot[2+namelen] == 0)
4894                  break;
4895              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4896              }              }
4897    
# Line 4272  for (;; ptr++) Line 4900  for (;; ptr++)
4900              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4901              }              }
4902            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4903                      find_parens(ptr, cd->bracount, name, namelen,                      find_parens(cd, name, namelen,
4904                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4905              {              {
4906              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4288  for (;; ptr++) Line 4916  for (;; ptr++)
4916    
4917    
4918          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4919          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4920          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4921          /* Fall through */          /* Fall through */
4922    
4923    
4924          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4925          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4926          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4927          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4928            {            {
4929            const uschar *called;            const uschar *called;
4930              terminator = CHAR_RIGHT_PARENTHESIS;
4931    
4932            if ((refsign = *ptr) == '+') ptr++;            /* Come here from the \g<...> and \g'...' code (Oniguruma
4933            else if (refsign == '-')            compatibility). However, the syntax has been checked to ensure that
4934              the ... are a (signed) number, so that neither ERR63 nor ERR29 will
4935              be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
4936              ever be taken. */
4937    
4938              HANDLE_NUMERICAL_RECURSION:
4939    
4940              if ((refsign = *ptr) == CHAR_PLUS)
4941                {
4942                ptr++;
4943                if ((digitab[*ptr] & ctype_digit) == 0)
4944                  {
4945                  *errorcodeptr = ERR63;
4946                  goto FAILED;
4947                  }
4948                }
4949              else if (refsign == CHAR_MINUS)
4950              {              {
4951              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4952                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4310  for (;; ptr++) Line 4955  for (;; ptr++)
4955    
4956            recno = 0;            recno = 0;
4957            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4958              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4959    
4960            if (*ptr != ')')            if (*ptr != terminator)
4961              {              {
4962              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4963              goto FAILED;              goto FAILED;
4964              }              }
4965    
4966            if (refsign == '-')            if (refsign == CHAR_MINUS)
4967              {              {
4968              if (recno == 0)              if (recno == 0)
4969                {                {
# Line 4332  for (;; ptr++) Line 4977  for (;; ptr++)
4977                goto FAILED;                goto FAILED;
4978                }                }
4979              }              }
4980            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
4981              {              {
4982              if (recno == 0)              if (recno == 0)
4983                {                {
# Line 4365  for (;; ptr++) Line 5010  for (;; ptr++)
5010    
5011              if (called == NULL)              if (called == NULL)
5012                {                {
5013                if (find_parens(ptr, cd->bracount, NULL, recno,                if (find_parens(cd, NULL, recno,
5014                     (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5015                  {                  {
5016                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5017                  goto FAILED;                  goto FAILED;
# Line 4418  for (;; ptr++) Line 5063  for (;; ptr++)
5063          set = unset = 0;          set = unset = 0;
5064          optset = &set;          optset = &set;
5065    
5066          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5067            {            {
5068            switch (*ptr++)            switch (*ptr++)
5069              {              {
5070              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5071    
5072              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5073              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5074              cd->external_options |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5075              break;              break;
5076    
5077              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5078              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5079              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5080              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5081              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5082              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5083    
5084              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5085                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4465  for (;; ptr++) Line 5110  for (;; ptr++)
5110          both phases.          both phases.
5111    
5112          If we are not at the pattern start, compile code to change the ims          If we are not at the pattern start, compile code to change the ims
5113          options if this setting actually changes any of them. We also pass the          options if this setting actually changes any of them, and reset the
5114          new setting back so that it can be put at the start of any following          greedy defaults and the case value for firstbyte and reqbyte. */
         branches, and when this group ends (if we are in a group), a resetting  
         item can be compiled. */  
5115    
5116          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5117            {            {
5118            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5119                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
5120              {              {
5121              cd->external_options = newoptions;              cd->external_options = newoptions;
             options = newoptions;  
5122              }              }
5123           else           else
5124              {              {
# Line 4485  for (;; ptr++) Line 5127  for (;; ptr++)
5127                *code++ = OP_OPT;                *code++ = OP_OPT;
5128                *code++ = newoptions & PCRE_IMS;                *code++ = newoptions & PCRE_IMS;
5129                }                }
   
             /* Change options at this level, and pass them back for use  
             in subsequent branches. Reset the greedy defaults and the case  
             value for firstbyte and reqbyte. */  
   
             *optionsptr = options = newoptions;  
5130              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
5131              greedy_non_default = greedy_default ^ 1;              greedy_non_default = greedy_default ^ 1;
5132              req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;              req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
5133              }              }
5134    
5135              /* Change options at this level, and pass them back for use
5136              in subsequent branches. When not at the start of the pattern, this
5137              information is also necessary so that a resetting item can be
5138              compiled at the end of a group (if we are in a group). */
5139    
5140              *optionsptr = options = newoptions;
5141            previous = NULL;       /* This item can't be repeated */            previous = NULL;       /* This item can't be repeated */
5142            continue;              /* It is complete */            continue;              /* It is complete */
5143            }            }
# Line 4611  for (;; ptr++) Line 5253  for (;; ptr++)
5253    
5254      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5255    
5256      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5257        {        {
5258        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5259        goto FAILED;        goto FAILED;
5260        }        }
5261    
5262      /* In the pre-compile phase, update the length by the length of the nested      /* In the pre-compile phase, update the length by the length of the group,
5263      group, less the brackets at either end. Then reduce the compiled code to      less the brackets at either end. Then reduce the compiled code to just a
5264      just the brackets so that it doesn't use much memory if it is duplicated by      set of non-capturing brackets so that it doesn't use much memory if it is
5265      a quantifier. */      duplicated by a quantifier.*/
5266    
5267      if (lengthptr != NULL)      if (lengthptr != NULL)
5268        {        {
5269          if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
5270            {
5271            *errorcodeptr = ERR20;
5272            goto FAILED;
5273            }
5274        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;        *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
5275        code++;        *code++ = OP_BRA;
5276        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
5277        *code++ = OP_KET;        *code++ = OP_KET;
5278        PUTINC(code, 0, 1 + LINK_SIZE);        PUTINC(code, 0, 1 + LINK_SIZE);
5279          break;    /* No need to waste time with special character handling */
5280        }        }
5281    
5282      /* Otherwise update the main code pointer to the end of the group. */      /* Otherwise update the main code pointer to the end of the group. */
5283    
5284      else code = tempcode;      code = tempcode;
5285    
5286      /* For a DEFINE group, required and first character settings are not      /* For a DEFINE group, required and first character settings are not
5287      relevant. */      relevant. */
# Line 4703  for (;; ptr++) Line 5351  for (;; ptr++)
5351      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5352      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5353    
5354      case '\\':      case CHAR_BACKSLASH:
5355      tempptr = ptr;      tempptr = ptr;
5356      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5357      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 4712  for (;; ptr++) Line 5360  for (;; ptr++)
5360        {        {
5361        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5362          {          {
5363          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5364            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5365                else inescq = TRUE;
5366          continue;          continue;
5367          }          }
5368    
# Line 4730  for (;; ptr++) Line 5379  for (;; ptr++)
5379        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
5380        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
5381    
5382          /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
5383          is a subroutine call by number (Oniguruma syntax). In fact, the value
5384          -ESC_g is returned only for these cases. So we don't need to check for <
5385          or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
5386          -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
5387          that is a synonym for a named back reference). */
5388    
5389          if (-c == ESC_g)
5390            {
5391            const uschar *p;
5392            save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5393            terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5394              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5395    
5396            /* These two statements stop the compiler for warning about possibly
5397            unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
5398            fact, because we actually check for a number below, the paths that
5399            would actually be in error are never taken. */
5400    
5401            skipbytes = 0;
5402            reset_bracount = FALSE;
5403    
5404            /* Test for a name */
5405    
5406            if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5407              {
5408              BOOL isnumber = TRUE;
5409              for (p = ptr + 1; *p != 0 && *p != terminator; p++)
5410                {
5411                if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
5412                if ((cd->ctypes[*p] & ctype_word) == 0) break;
5413                }
5414              if (*p != terminator)
5415                {
5416                *errorcodeptr = ERR57;
5417                break;
5418                }
5419              if (isnumber)
5420                {
5421                ptr++;
5422                goto HANDLE_NUMERICAL_RECURSION;
5423                }
5424              is_recurse = TRUE;
5425              goto NAMED_REF_OR_RECURSE;
5426              }
5427    
5428            /* Test a signed number in angle brackets or quotes. */
5429    
5430            p = ptr + 2;
5431            while ((digitab[*p] & ctype_digit) != 0) p++;
5432            if (*p != terminator)
5433              {
5434              *errorcodeptr = ERR57;
5435              break;
5436              }
5437            ptr++;
5438            goto HANDLE_NUMERICAL_RECURSION;
5439            }
5440    
5441        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5442        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5443    
5444        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5445              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5446          {          {
5447          is_recurse = FALSE;          is_recurse = FALSE;
5448          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5449              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5450              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5451          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5452          }          }
5453    
# Line 4837  for (;; ptr++) Line 5548  for (;; ptr++)
5548      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;      *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;
5549      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];      for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
5550    
5551        /* Remember if \r or \n were seen */
5552    
5553        if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5554          cd->external_flags |= PCRE_HASCRORLF;
5555    
5556      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
5557      byte, set it from this character, but revert to none on a zero repeat.      byte, set it from this character, but revert to none on a zero repeat.
5558      Otherwise, leave the firstbyte value alone, and don't change it on a zero      Otherwise, leave the firstbyte value alone, and don't change it on a zero
# Line 5079  for (;;) Line 5795  for (;;)
5795    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5796    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5797    
5798    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5799      {      {
5800      if (lengthptr == NULL)      if (lengthptr == NULL)
5801        {        {
# Line 5102  for (;;) Line 5818  for (;;)
5818    
5819      /* Resetting option if needed */      /* Resetting option if needed */
5820    
5821      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5822        {        {
5823        *code++ = OP_OPT;        *code++ = OP_OPT;
5824        *code++ = oldims;        *code++ = oldims;
# Line 5119  for (;;) Line 5835  for (;;)
5835      *ptrptr = ptr;      *ptrptr = ptr;
5836      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
5837      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
5838      if (lengthptr != NULL) *lengthptr += length;      if (lengthptr != NULL)
5839          {
5840          if (OFLOW_MAX - *lengthptr < length)
5841            {
5842            *errorcodeptr = ERR20;
5843            return FALSE;
5844            }
5845          *lengthptr += length;
5846          }
5847      return TRUE;      return TRUE;
5848      }      }
5849    
# Line 5223  do { Line 5947  do {
5947       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
5948       }       }
5949    
5950     /* .* is not anchored unless DOTALL is set and it isn't in brackets that     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
5951     are or may be referenced. */     it isn't in brackets that are or may be referenced. */
5952    
5953     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
5954               op == OP_TYPEPOSSTAR) &&               op == OP_TYPEPOSSTAR))
             (*options & PCRE_DOTALL) != 0)  
5955       {       {
5956       if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;       if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
5957           return FALSE;
5958       }       }
5959    
5960     /* Check for explicit anchoring */     /* Check for explicit anchoring */
# Line 5276  do { Line 6000  do {
6000       NULL, 0, FALSE);       NULL, 0, FALSE);
6001     register int op = *scode;     register int op = *scode;
6002    
6003       /* If we are at the start of a conditional assertion group, *both* the
6004       conditional assertion *and* what follows the condition must satisfy the test
6005       for start of line. Other kinds of condition fail. Note that there may be an
6006       auto-callout at the start of a condition. */
6007    
6008       if (op == OP_COND)
6009         {
6010         scode += 1 + LINK_SIZE;
6011         if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6012         switch (*scode)
6013           {
6014           case OP_CREF:
6015           case OP_RREF:
6016           case OP_DEF:
6017           return FALSE;
6018    
6019           default:     /* Assertion */
6020           if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6021           do scode += GET(scode, 1); while (*scode == OP_ALT);
6022           scode += 1 + LINK_SIZE;
6023           break;
6024           }
6025         scode = first_significant_code(scode, NULL, 0, FALSE);
6026         op = *scode;
6027         }
6028    
6029     /* Non-capturing brackets */     /* Non-capturing brackets */
6030    
6031     if (op == OP_BRA)     if (op == OP_BRA)
# Line 5294  do { Line 6044  do {
6044    
6045     /* Other brackets */     /* Other brackets */
6046    
6047     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE)
6048       { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }       {
6049         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6050         }
6051    
6052     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
6053     may be referenced. */     may be referenced. */
# Line 5412  Returns: pointer to compiled data Line 6164  Returns: pointer to compiled data
6164                  with errorptr and erroroffset set                  with errorptr and erroroffset set
6165  */  */
6166    
6167  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6168  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
6169    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
6170  {  {
# Line 5420  return pcre_compile2(pattern, options, N Line 6172  return pcre_compile2(pattern, options, N
6172  }  }
6173    
6174    
6175  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6176  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
6177    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
6178  {  {
# Line 5428  real_pcre *re; Line 6180  real_pcre *re;
6180  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
6181  int firstbyte, reqbyte, newline;  int firstbyte, reqbyte, newline;
6182  int errorcode = 0;  int errorcode = 0;
6183    int skipatstart = 0;
6184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6185  BOOL utf8;  BOOL utf8;
6186  #endif  #endif
# Line 5446  to fill in forward references to subpatt Line 6199  to fill in forward references to subpatt
6199    
6200  uschar cworkspace[COMPILE_WORK_SIZE];  uschar cworkspace[COMPILE_WORK_SIZE];
6201    
   
6202  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
6203    
6204  ptr = (const uschar *)pattern;  ptr = (const uschar *)pattern;
# Line 5474  if (erroroffset == NULL) Line 6226  if (erroroffset == NULL)
6226    
6227  *erroroffset = 0;  *erroroffset = 0;
6228    
6229    /* Set up pointers to the individual character tables */
6230    
6231    if (tables == NULL) tables = _pcre_default_tables;
6232    cd->lcc = tables + lcc_offset;
6233    cd->fcc = tables + fcc_offset;
6234    cd->cbits = tables + cbits_offset;
6235    cd->ctypes = tables + ctypes_offset;
6236    
6237    /* Check that all undefined public option bits are zero */
6238    
6239    if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6240      {
6241      errorcode = ERR17;
6242      goto PCRE_EARLY_ERROR_RETURN;
6243      }
6244    
6245    /* Check for global one-time settings at the start of the pattern, and remember
6246    the offset for later. */
6247    
6248    while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6249           ptr[skipatstart+1] == CHAR_ASTERISK)
6250      {
6251      int newnl = 0;
6252      int newbsr = 0;
6253    
6254      if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6255        { skipatstart += 7; options |= PCRE_UTF8; continue; }
6256    
6257      if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6258        { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6259      else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6260        { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6261      else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6262        { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6263      else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6264        { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6265      else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6266        { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6267    
6268      else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6269        { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6270      else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6271        { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6272    
6273      if (newnl != 0)
6274        options = (options & ~PCRE_NEWLINE_BITS) | newnl;
6275      else if (newbsr != 0)
6276        options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
6277      else break;
6278      }
6279    
6280  /* Can't support UTF8 unless PCRE has been compiled to include the code. */  /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6281    
6282  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 5492  if ((options & PCRE_UTF8) != 0) Line 6295  if ((options & PCRE_UTF8) != 0)
6295    }    }
6296  #endif  #endif
6297    
6298  if ((options & ~PUBLIC_OPTIONS) != 0)  /* Check validity of \R options. */
6299    
6300    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6301    {    {
6302    errorcode = ERR17;    case 0:
6303    goto PCRE_EARLY_ERROR_RETURN;    case PCRE_BSR_ANYCRLF:
6304      case PCRE_BSR_UNICODE:
6305      break;
6306      default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
6307    }    }
6308    
 /* Set up pointers to the individual character tables */  
   
 if (tables == NULL) tables = _pcre_default_tables;  
 cd->lcc = tables + lcc_offset;  
 cd->fcc = tables + fcc_offset;  
 cd->cbits = tables + cbits_offset;  
 cd->ctypes = tables + ctypes_offset;  
   
6309  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
6310  current code allows for fixed one- or two-byte sequences, plus "any" and  current code allows for fixed one- or two-byte sequences, plus "any" and
6311  "anycrlf". */  "anycrlf". */
6312    
6313  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & PCRE_NEWLINE_BITS)
6314    {    {
6315    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6316    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6317    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6318    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6319         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6320    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6321    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6322    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
# Line 5565  to compile parts of the pattern into; th Line 6365  to compile parts of the pattern into; th
6365  no longer needed, so hopefully this workspace will never overflow, though there  no longer needed, so hopefully this workspace will never overflow, though there
6366  is a test for its doing so. */  is a test for its doing so. */
6367    
6368  cd->bracount = 0;  cd->bracount = cd->final_bracount = 0;
6369  cd->names_found = 0;  cd->names_found = 0;
6370  cd->name_entry_size = 0;  cd->name_entry_size = 0;
6371  cd->name_table = NULL;  cd->name_table = NULL;
# Line 5575  cd->hwm = cworkspace; Line 6375  cd->hwm = cworkspace;
6375  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const uschar *)pattern;
6376  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
6377  cd->req_varyopt = 0;  cd->req_varyopt = 0;
 cd->nopartial = FALSE;  
6378  cd->external_options = options;  cd->external_options = options;
6379    cd->external_flags = 0;
6380    
6381  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
6382  don't need to look at the result of the function here. The initial options have  don't need to look at the result of the function here. The initial options have
# Line 5584  been put into the cd block so that they Line 6384  been put into the cd block so that they
6384  found within the regex right at the beginning. Bringing initial option settings  found within the regex right at the beginning. Bringing initial option settings
6385  outside can help speed up starting point checks. */  outside can help speed up starting point checks. */
6386    
6387    ptr += skipatstart;
6388  code = cworkspace;  code = cworkspace;
6389  *code = OP_BRA;  *code = OP_BRA;
6390  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
# Line 5614  if (re == NULL) Line 6415  if (re == NULL)
6415    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
6416    }    }
6417    
6418  /* Put in the magic number, and save the sizes, initial options, and character  /* Put in the magic number, and save the sizes, initial options, internal
6419  table pointer. NULL is used for the default character tables. The nullpad field  flags, and character table pointer. NULL is used for the default character
6420  is at the end; it's there to help in the case when a regex compiled on a system  tables. The nullpad field is at the end; it's there to help in the case when a
6421  with 4-byte pointers is run on another with 8-byte pointers. */  regex compiled on a system with 4-byte pointers is run on another with 8-byte
6422    pointers. */
6423    
6424  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
6425  re->size = size;  re->size = size;
6426  re->options = cd->external_options;  re->options = cd->external_options;
6427    re->flags = cd->external_flags;
6428  re->dummy1 = 0;  re->dummy1 = 0;
6429  re->first_byte = 0;  re->first_byte = 0;
6430  re->req_byte = 0;  re->req_byte = 0;
# Line 5639  field. Reset the bracket count and the n Line 6442  field. Reset the bracket count and the n
6442  field; this time it's used for remembering forward references to subpatterns.  field; this time it's used for remembering forward references to subpatterns.
6443  */  */
6444    
6445    cd->final_bracount = cd->bracount;  /* Save for checking forward references */
6446  cd->bracount = 0;  cd->bracount = 0;
6447  cd->names_found = 0;  cd->names_found = 0;
6448  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;
# Line 5646  codestart = cd->name_table + re->name_en Line 6450  codestart = cd->name_table + re->name_en
6450  cd->start_code = codestart;  cd->start_code = codestart;
6451  cd->hwm = cworkspace;  cd->hwm = cworkspace;
6452  cd->req_varyopt = 0;  cd->req_varyopt = 0;
6453  cd->nopartial = FALSE;  cd->had_accept = FALSE;
6454    
6455  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
6456  error, errorcode will be set non-zero, so we don't need to look at the result  error, errorcode will be set non-zero, so we don't need to look at the result
6457  of the function here. */  of the function here. */
6458    
6459  ptr = (const uschar *)pattern;  ptr = (const uschar *)pattern + skipatstart;
6460  code = (uschar *)codestart;  code = (uschar *)codestart;
6461  *code = OP_BRA;  *code = OP_BRA;
6462  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
6463    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
6464  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
6465  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
6466    re->flags = cd->external_flags;
6467    
6468  if (cd->nopartial) re->options |= PCRE_NOPARTIAL;  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */
6469    
6470  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
6471    
# Line 5702  if (errorcode != 0) Line 6507  if (errorcode != 0)
6507    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
6508    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
6509    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
6510    *errorptr = error_texts[errorcode];    *errorptr = find_error_text(errorcode);
6511    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
6512    return NULL;    return NULL;
6513    }    }
# Line 5731  if ((re->options & PCRE_ANCHORED) == 0) Line 6536  if ((re->options & PCRE_ANCHORED) == 0)
6536        int ch = firstbyte & 255;        int ch = firstbyte & 255;
6537        re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&        re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
6538           cd->fcc[ch] == ch)? ch : firstbyte;           cd->fcc[ch] == ch)? ch : firstbyte;
6539        re->options |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
6540        }        }
6541      else if (is_startline(codestart, 0, cd->backref_map))      else if (is_startline(codestart, 0, cd->backref_map))
6542        re->options |= PCRE_STARTLINE;        re->flags |= PCRE_STARTLINE;
6543      }      }
6544    }    }
6545    
# Line 5748  if (reqbyte >= 0 && Line 6553  if (reqbyte >= 0 &&
6553    int ch = reqbyte & 255;    int ch = reqbyte & 255;
6554    re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&    re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
6555      cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;      cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
6556    re->options |= PCRE_REQCHSET;    re->flags |= PCRE_REQCHSET;
6557    }    }
6558    
6559  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
# Line 5759  case when building a production library. Line 6564  case when building a production library.
6564  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6565    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
6566    
6567  if (re->options != 0)  printf("Options=%08x\n", re->options);
   {  
   printf("%s%s%s%s%s%s%s%s%s\n",  
     ((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",  
     ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",  
     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",  
     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",  
     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",  
     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",  
     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",  
     ((re->options & PCRE_EXTRA) != 0)? "extra " : "",  
     ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");  
   }  
6568    
6569  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->flags & PCRE_FIRSTSET) != 0)
6570    {    {
6571    int ch = re->first_byte & 255;    int ch = re->first_byte & 255;
6572    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
# Line 5782  if ((re->options & PCRE_FIRSTSET) != 0) Line 6575  if ((re->options & PCRE_FIRSTSET) != 0)
6575      else printf("First char = \\x%02x%s\n", ch, caseless);      else printf("First char = \\x%02x%s\n", ch, caseless);
6576    }    }
6577    
6578  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6579    {    {
6580    int ch = re->req_byte & 255;    int ch = re->req_byte & 255;
6581    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
# Line 5799  was compiled can be seen. */ Line 6592  was compiled can be seen. */
6592  if (code - codestart > length)  if (code - codestart > length)
6593    {    {
6594    (pcre_free)(re);    (pcre_free)(re);
6595    *errorptr = error_texts[ERR23];    *errorptr = find_error_text(ERR23);
6596    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (uschar *)pattern;
6597    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6598    return NULL;    return NULL;

Legend:
Removed from v.180  
changed lines
  Added in v.412

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12