/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 274 by ph10, Tue Nov 20 10:05:23 2007 UTC revision 411 by ph10, Fri Apr 10 15:40:21 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 97  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 142  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182    platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
185    int   len;    int   len;
# Line 150  typedef struct verbitem { Line 187  typedef struct verbitem {
187  } verbitem;  } verbitem;
188    
189  static const char verbnames[] =  static const char verbnames[] =
190    "ACCEPT\0"    STRING_ACCEPT0
191    "COMMIT\0"    STRING_COMMIT0
192    "F\0"    STRING_F0
193    "FAIL\0"    STRING_FAIL0
194    "PRUNE\0"    STRING_PRUNE0
195    "SKIP\0"    STRING_SKIP0
196    "THEN";    STRING_THEN;
197    
198  static verbitem verbs[] = {  static const verbitem verbs[] = {
199    { 6, OP_ACCEPT },    { 6, OP_ACCEPT },
200    { 6, OP_COMMIT },    { 6, OP_COMMIT },
201    { 1, OP_FAIL },    { 1, OP_FAIL },
# Line 168  static verbitem verbs[] = { Line 205  static verbitem verbs[] = {
205    { 4, OP_THEN  }    { 4, OP_THEN  }
206  };  };
207    
208  static int verbcount = sizeof(verbs)/sizeof(verbitem);  static const int verbcount = sizeof(verbs)/sizeof(verbitem);
209    
210    
211  /* Tables of names of POSIX character classes and their lengths. The names are  /* Tables of names of POSIX character classes and their lengths. The names are
# Line 178  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    "word\0"   "xdigit";    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 295  static const char error_texts[] = Line 333  static const char error_texts[] =
333    /* 55 */    /* 55 */
334    "repeating a DEFINE group is not allowed\0"    "repeating a DEFINE group is not allowed\0"
335    "inconsistent NEWLINE options\0"    "inconsistent NEWLINE options\0"
336    "\\g is not followed by a braced name or an optionally braced non-zero number\0"    "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
337    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"    "a numbered reference must not be zero\0"
338    "(*VERB) with an argument is not supported\0"    "(*VERB) with an argument is not supported\0"
339    /* 60 */    /* 60 */
340    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
341    "number is too big\0"    "number is too big\0"
342    "subpattern name expected\0"    "subpattern name expected\0"
343    "digit expected after (?+";    "digit expected after (?+\0"
344      "] is an invalid data character in JavaScript compatibility mode";
345    
346    
347  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 321  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 357  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 454  static const char * Line 500  static const char *
500  find_error_text(int n)  find_error_text(int n)
501  {  {
502  const char *s = error_texts;  const char *s = error_texts;
503  for (; n > 0; n--) while (*s++ != 0);  for (; n > 0; n--) while (*s++ != 0) {};
504  return s;  return s;
505  }  }
506    
# Line 502  if (c == 0) *errorcodeptr = ERR1; Line 548  if (c == 0) *errorcodeptr = ERR1;
548  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
# Line 523  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
580      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by one of a number of specific things:
581      is an absolute backreference. If negative, it is a relative backreference.  
582      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a      (1) A number, either plain or braced. If positive, it is an absolute
583      reference to a named group. This is part of Perl's movement towards a      backreference. If negative, it is a relative backreference. This is a Perl
584      unified syntax for back references. As this is synonymous with \k{name}, we      5.10 feature.
585      fudge it up by pretending it really was \k. */  
586        (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
587        is part of Perl's movement towards a unified syntax for back references. As
588        this is synonymous with \k{name}, we fudge it up by pretending it really
589        was \k.
590    
591        (3) For Oniguruma compatibility we also support \g followed by a name or a
592        number either in angle brackets or in single quotes. However, these are
593        (possibly recursive) subroutine calls, _not_ backreferences. Just return
594        the -ESC_g code (cf \k). */
595    
596        case CHAR_g:
597        if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598          {
599          c = -ESC_g;
600          break;
601          }
602    
603        /* Handle the Perl-compatible cases */
604    
605      case 'g':      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
     if (ptr[1] == '{')  
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 554  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 563  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c < 0)      if (c < 0)   /* Integer overflow */
632        {        {
633        *errorcodeptr = ERR61;        *errorcodeptr = ERR61;
634        break;        break;
635        }        }
636    
637      if (c == 0 || (braced && *(++ptr) != '}'))      if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        break;        break;
641        }        }
642    
643        if (c == 0)
644          {
645          *errorcodeptr = ERR58;
646          break;
647          }
648    
649      if (negated)      if (negated)
650        {        {
651        if (c > bracount)        if (c > bracount)
# Line 602  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683        if (c < 0)        if (c < 0)    /* Integer overflow */
684          {          {
685          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
686          break;          break;
# Line 628  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 641  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 652  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 662  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 690  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 706  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
# Line 714  else Line 783  else
783        break;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
# Line 780  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
# Line 791  if (c == '{') Line 860  if (c == '{')
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 859  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 900  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 910  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 940  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    count        current count of capturing parens so far encountered    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, int count, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041    int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != '\\');        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
1124      }      }
1125    
1126    /* Skip over character classes */    /* Skip over character classes; this logic must be similar to the way they
1127      are handled for real. If the first character is '^', skip it. Also, if the
1128      first few characters (either before or after ^) are \Q\E or \E we skip them
1129      too. This makes for compatibility with Perl. Note the use of STR macros to
1130      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1133      {      {
1134      while (*(++ptr) != ']')      BOOL negate_class = FALSE;
1135        for (;;)
1136          {
1137          int c = *(++ptr);
1138          if (c == CHAR_BACKSLASH)
1139            {
1140            if (ptr[1] == CHAR_E)
1141              ptr++;
1142            else if (strncmp((const char *)ptr+1,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 3;
1145            else
1146              break;
1147            }
1148          else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
1149            negate_class = TRUE;
1150          else break;
1151          }
1152    
1153        /* If the next character is ']', it is a data character that must be
1154        skipped, except in JavaScript compatibility mode. */
1155    
1156        if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158          ptr++;
1159    
1160        while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1161        {        {
1162        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1163        if (*ptr == '\\')        if (*ptr == CHAR_BACKSLASH)
1164          {          {
1165          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1166          if (*ptr == 'Q') for (;;)          if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != '\\');            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
1173          }          }
# Line 1008  for (; *ptr != 0; ptr++) Line 1177  for (; *ptr != 0; ptr++)
1177    
1178    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1179    
1180    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != '\n');      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?' && ptr[1] != '*')  
1190      {      {
1191      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1193      continue;      if (*ptr == 0) goto FAIL_EXIT;
1194      }      }
1195    
1196    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197    if (*ptr == 'P') ptr++;                      /* Allow optional P */      {
1198        if (dup_parens && *count < hwm_count) *count = hwm_count;
1199        *ptrptr = ptr;
1200        return -1;
1201        }
1202    
1203    /* We have to disambiguate (?<! and (?<= from (?<name> */    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207        }
1208      }
1209    
1210    if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  FAIL_EXIT:
1211         *ptr != '\'')  *ptrptr = ptr;
1212      continue;  return -1;
1213    }
1214    
1215    
1216    
1217    
1218    /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    /* This function scans along a pattern's text looking for capturing
1223    subpatterns, and counting them. If it finds a named pattern that matches the
1224    name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    returns when it reaches a given numbered subpattern. This is used for forward
1226    references to subpatterns. We used to be able to start this scan from the
1227    current compiling point, using the current count value from cd->bracount, and
1228    do it all in a single loop, but the addition of the possibility of duplicate
1229    subpattern numbers means that we have to scan from the very start, in order to
1230    take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233    count++;  Arguments:
1234      cd           compile background data
1235      name         name to seek, or NULL if seeking a numbered subpattern
1236      lorn         name length, or subpattern number if name is NULL
1237      xmode        TRUE if we are in /x mode
1238    
1239    Returns:       the number of the found subpattern, or -1 if not found
1240    */
1241    
1242    if (name == NULL && count == lorn) return count;  static int
1243    term = *ptr++;  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    if (term == '<') term = '>';  {
1245    thisname = ptr;  uschar *ptr = (uschar *)cd->start_pattern;
1246    while (*ptr != term) ptr++;  int count = 0;
1247    if (name != NULL && lorn == ptr - thisname &&  int rc;
1248        strncmp((const char *)name, (const char *)thisname, lorn) == 0)  
1249      return count;  /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    for (;;)
1255      {
1256      rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257      if (rc > 0 || *ptr++ == 0) break;
1258    }    }
1259    
1260  return -1;  return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1252  for (;;) Line 1464  for (;;)
1464      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1465      case OP_WORDCHAR:      case OP_WORDCHAR:
1466      case OP_ANY:      case OP_ANY:
1467        case OP_ALLANY:
1468      branchlength++;      branchlength++;
1469      cc++;      cc++;
1470      break;      break;
# Line 1400  for (;;) Line 1613  for (;;)
1613        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1614        break;        break;
1615        }        }
1616    #else
1617        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1618  #endif  #endif
1619      }      }
1620    }    }
# Line 1493  for (;;) Line 1708  for (;;)
1708        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1709        break;        break;
1710        }        }
1711    #else
1712        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1713  #endif  #endif
1714      }      }
1715    }    }
# Line 1508  for (;;) Line 1725  for (;;)
1725  can match the empty string or not. It is called from could_be_empty()  can match the empty string or not. It is called from could_be_empty()
1726  below and from compile_branch() when checking for an unlimited repeat of a  below and from compile_branch() when checking for an unlimited repeat of a
1727  group that can match nothing. Note that first_significant_code() skips over  group that can match nothing. Note that first_significant_code() skips over
1728  assertions. If we hit an unclosed bracket, we return "empty" - this means we've  backward and negative forward assertions when its final argument is TRUE. If we
1729  struck an inner bracket whose current branch will already have been scanned.  hit an unclosed bracket, we return "empty" - this means we've struck an inner
1730    bracket whose current branch will already have been scanned.
1731    
1732  Arguments:  Arguments:
1733    code        points to start of search    code        points to start of search
# Line 1531  for (code = first_significant_code(code Line 1749  for (code = first_significant_code(code
1749    
1750    c = *code;    c = *code;
1751    
1752      /* Skip over forward assertions; the other assertions are skipped by
1753      first_significant_code() with a TRUE final argument. */
1754    
1755      if (c == OP_ASSERT)
1756        {
1757        do code += GET(code, 1); while (*code == OP_ALT);
1758        c = *code;
1759        continue;
1760        }
1761    
1762    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1763    
1764    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO)
1765      {      {
1766      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1767      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
# Line 1548  for (code = first_significant_code(code Line 1776  for (code = first_significant_code(code
1776      BOOL empty_branch;      BOOL empty_branch;
1777      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1778    
1779      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1780        empty branch, so just skip over the conditional, because it could be empty.
1781        Otherwise, scan the individual branches of the group. */
1782    
1783      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1784        code += GET(code, 1);        code += GET(code, 1);
1785        else
1786          {
1787          empty_branch = FALSE;
1788          do
1789            {
1790            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1791              empty_branch = TRUE;
1792            code += GET(code, 1);
1793            }
1794          while (*code == OP_ALT);
1795          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1796        }        }
1797      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1798      c = *code;      c = *code;
1799      continue;      continue;
1800      }      }
# Line 1619  for (code = first_significant_code(code Line 1855  for (code = first_significant_code(code
1855      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1856      case OP_WORDCHAR:      case OP_WORDCHAR:
1857      case OP_ANY:      case OP_ANY:
1858        case OP_ALLANY:
1859      case OP_ANYBYTE:      case OP_ANYBYTE:
1860      case OP_CHAR:      case OP_CHAR:
1861      case OP_CHARNC:      case OP_CHARNC:
# Line 1726  return TRUE; Line 1963  return TRUE;
1963  *************************************************/  *************************************************/
1964    
1965  /* This function is called when the sequence "[:" or "[." or "[=" is  /* This function is called when the sequence "[:" or "[." or "[=" is
1966  encountered in a character class. It checks whether this is followed by an  encountered in a character class. It checks whether this is followed by a
1967  optional ^ and then a sequence of letters, terminated by a matching ":]" or  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
1968  ".]" or "=]".  reach an unescaped ']' without the special preceding character, return FALSE.
1969    
1970    Originally, this function only recognized a sequence of letters between the
1971    terminators, but it seems that Perl recognizes any sequence of characters,
1972    though of course unknown POSIX names are subsequently rejected. Perl gives an
1973    "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
1974    didn't consider this to be a POSIX class. Likewise for [:1234:].
1975    
1976    The problem in trying to be exactly like Perl is in the handling of escapes. We
1977    have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
1978    class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
1979    below handles the special case of \], but does not try to do any other escape
1980    processing. This makes it different from Perl for cases such as [:l\ower:]
1981    where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
1982    "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
1983    I think.
1984    
1985  Argument:  Arguments:
1986    ptr      pointer to the initial [    ptr      pointer to the initial [
1987    endptr   where to return the end pointer    endptr   where to return the end pointer
   cd       pointer to compile data  
1988    
1989  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1990  */  */
1991    
1992  static BOOL  static BOOL
1993  check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)  check_posix_syntax(const uschar *ptr, const uschar **endptr)
1994  {  {
1995  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
1996  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1997  if (*(++ptr) == '^') ptr++;  for (++ptr; *ptr != 0; ptr++)
 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;  
 if (*ptr == terminator && ptr[1] == ']')  
1998    {    {
1999    *endptr = ptr;    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
2000    return TRUE;      {
2001        if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2002        if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2003          {
2004          *endptr = ptr;
2005          return TRUE;
2006          }
2007        }
2008    }    }
2009  return FALSE;  return FALSE;
2010  }  }
# Line 1794  return -1; Line 2050  return -1;
2050  that is referenced. This means that groups can be replicated for fixed  that is referenced. This means that groups can be replicated for fixed
2051  repetition simply by copying (because the recursion is allowed to refer to  repetition simply by copying (because the recursion is allowed to refer to
2052  earlier groups that are outside the current group). However, when a group is  earlier groups that are outside the current group). However, when a group is
2053  optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before  optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is
2054  it, after it has been compiled. This means that any OP_RECURSE items within it  inserted before it, after it has been compiled. This means that any OP_RECURSE
2055  that refer to the group itself or any contained groups have to have their  items within it that refer to the group itself or any contained groups have to
2056  offsets adjusted. That one of the jobs of this function. Before it is called,  have their offsets adjusted. That one of the jobs of this function. Before it
2057  the partially compiled regex must be temporarily terminated with OP_END.  is called, the partially compiled regex must be temporarily terminated with
2058    OP_END.
2059    
2060  This function has been extended with the possibility of forward references for  This function has been extended with the possibility of forward references for
2061  recursions and subroutine calls. It must also check the list of such references  recursions and subroutine calls. It must also check the list of such references
# Line 1933  get_othercase_range(unsigned int *cptr, Line 2190  get_othercase_range(unsigned int *cptr,
2190  unsigned int c, othercase, next;  unsigned int c, othercase, next;
2191    
2192  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
2193    { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }    { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
2194    
2195  if (c > d) return FALSE;  if (c > d) return FALSE;
2196    
# Line 1942  next = othercase + 1; Line 2199  next = othercase + 1;
2199    
2200  for (++c; c <= d; c++)  for (++c; c <= d; c++)
2201    {    {
2202    if (_pcre_ucp_othercase(c) != next) break;    if (UCD_OTHERCASE(c) != next) break;
2203    next++;    next++;
2204    }    }
2205    
# Line 1988  if ((options & PCRE_EXTENDED) != 0) Line 2245  if ((options & PCRE_EXTENDED) != 0)
2245    for (;;)    for (;;)
2246      {      {
2247      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2248      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2249        {        {
2250        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2251          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2000  if ((options & PCRE_EXTENDED) != 0) Line 2257  if ((options & PCRE_EXTENDED) != 0)
2257  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2258  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2259    
2260  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2261    {    {
2262    int temperrorcode = 0;    int temperrorcode = 0;
2263    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 2025  if ((options & PCRE_EXTENDED) != 0) Line 2282  if ((options & PCRE_EXTENDED) != 0)
2282    for (;;)    for (;;)
2283      {      {
2284      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2285      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2286        {        {
2287        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2288          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2036  if ((options & PCRE_EXTENDED) != 0) Line 2293  if ((options & PCRE_EXTENDED) != 0)
2293    
2294  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2295    
2296  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2297    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2298        return FALSE;
2299    
2300  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2301  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 2052  if (next >= 0) switch(op_code) Line 2310  if (next >= 0) switch(op_code)
2310    case OP_CHAR:    case OP_CHAR:
2311  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2312    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
2313    #else
2314      (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
2315  #endif  #endif
2316    return item != next;    return item != next;
2317    
# Line 2070  if (next >= 0) switch(op_code) Line 2330  if (next >= 0) switch(op_code)
2330      unsigned int othercase;      unsigned int othercase;
2331      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2332  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2333      othercase = _pcre_ucp_othercase((unsigned int)next);      othercase = UCD_OTHERCASE((unsigned int)next);
2334  #else  #else
2335      othercase = NOTACHAR;      othercase = NOTACHAR;
2336  #endif  #endif
# Line 2083  if (next >= 0) switch(op_code) Line 2343  if (next >= 0) switch(op_code)
2343    /* For OP_NOT, "item" must be a single-byte character. */    /* For OP_NOT, "item" must be a single-byte character. */
2344    
2345    case OP_NOT:    case OP_NOT:
   if (next < 0) return FALSE;  /* Not a character */  
2346    if (item == next) return TRUE;    if (item == next) return TRUE;
2347    if ((options & PCRE_CASELESS) == 0) return FALSE;    if ((options & PCRE_CASELESS) == 0) return FALSE;
2348  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2092  if (next >= 0) switch(op_code) Line 2351  if (next >= 0) switch(op_code)
2351      unsigned int othercase;      unsigned int othercase;
2352      if (next < 128) othercase = cd->fcc[next]; else      if (next < 128) othercase = cd->fcc[next]; else
2353  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2354      othercase = _pcre_ucp_othercase(next);      othercase = UCD_OTHERCASE(next);
2355  #else  #else
2356      othercase = NOTACHAR;      othercase = NOTACHAR;
2357  #endif  #endif
# Line 2346  uschar classbits[32]; Line 2605  uschar classbits[32];
2605  BOOL class_utf8;  BOOL class_utf8;
2606  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8 = (options & PCRE_UTF8) != 0;
2607  uschar *class_utf8data;  uschar *class_utf8data;
2608    uschar *class_utf8data_base;
2609  uschar utf8_char[6];  uschar utf8_char[6];
2610  #else  #else
2611  BOOL utf8 = FALSE;  BOOL utf8 = FALSE;
# Line 2385  req_caseopt = ((options & PCRE_CASELESS) Line 2645  req_caseopt = ((options & PCRE_CASELESS)
2645  for (;; ptr++)  for (;; ptr++)
2646    {    {
2647    BOOL negate_class;    BOOL negate_class;
2648    BOOL should_flip_negation;    BOOL should_flip_negation;
2649    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2650    BOOL is_quantifier;    BOOL is_quantifier;
2651    BOOL is_recurse;    BOOL is_recurse;
# Line 2473  for (;; ptr++) Line 2733  for (;; ptr++)
2733    
2734    if (inescq && c != 0)    if (inescq && c != 0)
2735      {      {
2736      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2737        {        {
2738        inescq = FALSE;        inescq = FALSE;
2739        ptr++;        ptr++;
# Line 2499  for (;; ptr++) Line 2759  for (;; ptr++)
2759    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2760    a quantifier. */    a quantifier. */
2761    
2762    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2763      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2764        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2765    
2766    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2767         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2515  for (;; ptr++) Line 2776  for (;; ptr++)
2776    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2777      {      {
2778      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2779      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2780        {        {
2781        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2782          {          {
# Line 2540  for (;; ptr++) Line 2801  for (;; ptr++)
2801      {      {
2802      /* ===================================================================*/      /* ===================================================================*/
2803      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2804      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2805      case ')':      case CHAR_RIGHT_PARENTHESIS:
2806      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2807      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2808      *codeptr = code;      *codeptr = code;
# Line 2563  for (;; ptr++) Line 2824  for (;; ptr++)
2824      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2825      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2826    
2827      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2828      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2829        {        {
2830        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2572  for (;; ptr++) Line 2833  for (;; ptr++)
2833      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2834      break;      break;
2835    
2836      case '$':      case CHAR_DOLLAR_SIGN:
2837      previous = NULL;      previous = NULL;
2838      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2839      break;      break;
# Line 2580  for (;; ptr++) Line 2841  for (;; ptr++)
2841      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2842      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2843    
2844      case '.':      case CHAR_DOT:
2845      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2846      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2847      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
2848      previous = code;      previous = code;
2849      *code++ = OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
2850      break;      break;
2851    
2852    
# Line 2600  for (;; ptr++) Line 2861  for (;; ptr++)
2861      opcode is compiled. It may optionally have a bit map for characters < 256,      opcode is compiled. It may optionally have a bit map for characters < 256,
2862      but those above are are explicitly listed afterwards. A flag byte tells      but those above are are explicitly listed afterwards. A flag byte tells
2863      whether the bitmap is present, and whether this is a negated class or not.      whether the bitmap is present, and whether this is a negated class or not.
     */  
2864    
2865      case '[':      In JavaScript compatibility mode, an isolated ']' causes an error. In
2866        default (Perl) mode, it is treated as a data character. */
2867    
2868        case CHAR_RIGHT_SQUARE_BRACKET:
2869        if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2870          {
2871          *errorcodeptr = ERR64;
2872          goto FAILED;
2873          }
2874        goto NORMAL_CHAR;
2875    
2876        case CHAR_LEFT_SQUARE_BRACKET:
2877      previous = code;      previous = code;
2878    
2879      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2880      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2881    
2882      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2883          check_posix_syntax(ptr, &tempptr, cd))           ptr[1] == CHAR_EQUALS_SIGN) &&
2884            check_posix_syntax(ptr, &tempptr))
2885        {        {
2886        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2887        goto FAILED;        goto FAILED;
2888        }        }
2889    
# Line 2623  for (;; ptr++) Line 2895  for (;; ptr++)
2895      for (;;)      for (;;)
2896        {        {
2897        c = *(++ptr);        c = *(++ptr);
2898        if (c == '\\')        if (c == CHAR_BACKSLASH)
2899          {          {
2900          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
2901            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
2902              else break;          else if (strncmp((const char *)ptr+1,
2903                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2904              ptr += 3;
2905            else
2906              break;
2907          }          }
2908        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2909          negate_class = TRUE;          negate_class = TRUE;
2910        else break;        else break;
2911        }        }
2912    
2913      /* If a class contains a negative special such as \S, we need to flip the      /* Empty classes are allowed in JavaScript compatibility mode. Otherwise,
2914      negation flag at the end, so that support for characters > 255 works      an initial ']' is taken as a data character -- the code below handles
2915        that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2916        [^] must match any character, so generate OP_ALLANY. */
2917    
2918        if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2919            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2920          {
2921          *code++ = negate_class? OP_ALLANY : OP_FAIL;
2922          if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2923          zerofirstbyte = firstbyte;
2924          break;
2925          }
2926    
2927        /* If a class contains a negative special such as \S, we need to flip the
2928        negation flag at the end, so that support for characters > 255 works
2929      correctly (they are all included in the class). */      correctly (they are all included in the class). */
2930    
2931      should_flip_negation = FALSE;      should_flip_negation = FALSE;
# Line 2657  for (;; ptr++) Line 2947  for (;; ptr++)
2947  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2948      class_utf8 = FALSE;                       /* No chars >= 256 */      class_utf8 = FALSE;                       /* No chars >= 256 */
2949      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */      class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
2950        class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
2951  #endif  #endif
2952    
2953      /* Process characters until ] is reached. By writing this as a "do" it      /* Process characters until ] is reached. By writing this as a "do" it
# Line 2672  for (;; ptr++) Line 2963  for (;; ptr++)
2963          {                           /* Braces are required because the */          {                           /* Braces are required because the */
2964          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
2965          }          }
2966    
2967          /* In the pre-compile phase, accumulate the length of any UTF-8 extra
2968          data and reset the pointer. This is so that very large classes that
2969          contain a zillion UTF-8 characters no longer overwrite the work space
2970          (which is on the stack). */
2971    
2972          if (lengthptr != NULL)
2973            {
2974            *lengthptr += class_utf8data - class_utf8data_base;
2975            class_utf8data = class_utf8data_base;
2976            }
2977    
2978  #endif  #endif
2979    
2980        /* Inside \Q...\E everything is literal except \E */        /* Inside \Q...\E everything is literal except \E */
2981    
2982        if (inescq)        if (inescq)
2983          {          {
2984          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2985            {            {
2986            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2987            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2693  for (;; ptr++) Line 2996  for (;; ptr++)
2996        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2997        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2998    
2999        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
3000            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3001            check_posix_syntax(ptr, &tempptr, cd))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3002          {          {
3003          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3004          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3005          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3006          uschar pbits[32];          uschar pbits[32];
3007    
3008          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3009            {            {
3010            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3011            goto FAILED;            goto FAILED;
3012            }            }
3013    
3014          ptr += 2;          ptr += 2;
3015          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3016            {            {
3017            local_negate = TRUE;            local_negate = TRUE;
3018            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
3019            ptr++;            ptr++;
3020            }            }
3021    
# Line 2782  for (;; ptr++) Line 3085  for (;; ptr++)
3085        to 'or' into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3086        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3087    
3088        if (c == '\\')        if (c == CHAR_BACKSLASH)
3089          {          {
3090          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3091          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3092    
3093          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3094          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3095          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3096          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3097            {            {
3098            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3099              {              {
3100              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3101              }              }
# Line 2815  for (;; ptr++) Line 3118  for (;; ptr++)
3118              continue;              continue;
3119    
3120              case ESC_D:              case ESC_D:
3121              should_flip_negation = TRUE;              should_flip_negation = TRUE;
3122              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
3123              continue;              continue;
3124    
# Line 2824  for (;; ptr++) Line 3127  for (;; ptr++)
3127              continue;              continue;
3128    
3129              case ESC_W:              case ESC_W:
3130              should_flip_negation = TRUE;              should_flip_negation = TRUE;
3131              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
3132              continue;              continue;
3133    
# Line 2834  for (;; ptr++) Line 3137  for (;; ptr++)
3137              continue;              continue;
3138    
3139              case ESC_S:              case ESC_S:
3140              should_flip_negation = TRUE;              should_flip_negation = TRUE;
3141              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
3142              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
3143              continue;              continue;
3144    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
3145              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
3146              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
3147              }              }
# Line 3021  for (;; ptr++) Line 3321  for (;; ptr++)
3321        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3322    
3323        CHECK_RANGE:        CHECK_RANGE:
3324        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3325          {          {
3326          inescq = FALSE;          inescq = FALSE;
3327          ptr += 2;          ptr += 2;
# Line 3031  for (;; ptr++) Line 3331  for (;; ptr++)
3331    
3332        /* Remember \r or \n */        /* Remember \r or \n */
3333    
3334        if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3335    
3336        /* Check for range */        /* Check for range */
3337    
3338        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == CHAR_MINUS)
3339          {          {
3340          int d;          int d;
3341          ptr += 2;          ptr += 2;
3342          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3343    
3344          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3345          mode. */          mode. */
3346    
3347          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3348            {            {
3349            ptr += 2;            ptr += 2;
3350            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3351                { ptr += 2; continue; }
3352            inescq = TRUE;            inescq = TRUE;
3353            break;            break;
3354            }            }
3355    
3356          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3357            {            {
3358            ptr = oldptr;            ptr = oldptr;
3359            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 3071  for (;; ptr++) Line 3372  for (;; ptr++)
3372          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3373          in such circumstances. */          in such circumstances. */
3374    
3375          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3376            {            {
3377            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3378            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3379    
3380            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3381            special means the '-' was literal */            special means the '-' was literal */
3382    
3383            if (d < 0)            if (d < 0)
3384              {              {
3385              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3386              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3387              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3388                {                {
3389                ptr = oldptr;                ptr = oldptr;
3390                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3104  for (;; ptr++) Line 3405  for (;; ptr++)
3405    
3406          /* Remember \r or \n */          /* Remember \r or \n */
3407    
3408          if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3409    
3410          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3411          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
# Line 3224  for (;; ptr++) Line 3525  for (;; ptr++)
3525          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
3526            {            {
3527            unsigned int othercase;            unsigned int othercase;
3528            if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)            if ((othercase = UCD_OTHERCASE(c)) != c)
3529              {              {
3530              *class_utf8data++ = XCL_SINGLE;              *class_utf8data++ = XCL_SINGLE;
3531              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
# Line 3251  for (;; ptr++) Line 3552  for (;; ptr++)
3552    
3553      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3554    
3555      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3556    
3557      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3558        {        {
# Line 3340  we set the flag only if there is a liter Line 3641  we set the flag only if there is a liter
3641      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
3642    
3643      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
3644      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
3645      such as \S in the class, because in that case all characters > 255 are in      such as \S in the class, because in that case all characters > 255 are in
3646      the class, so any that were explicitly given as well can be ignored. If      the class, so any that were explicitly given as well can be ignored. If
3647      (when there are explicit characters > 255 that must be listed) there are no      (when there are explicit characters > 255 that must be listed) there are no
3648      characters < 256, we can omit the bitmap in the actual compiled code. */      characters < 256, we can omit the bitmap in the actual compiled code. */
3649    
# Line 3373  we set the flag only if there is a liter Line 3674  we set the flag only if there is a liter
3674        }        }
3675  #endif  #endif
3676    
3677      /* If there are no characters > 255, set the opcode to OP_CLASS or      /* If there are no characters > 255, set the opcode to OP_CLASS or
3678      OP_NCLASS, depending on whether the whole class was negated and whether      OP_NCLASS, depending on whether the whole class was negated and whether
3679      there were negative specials such as \S in the class. Then copy the 32-byte      there were negative specials such as \S in the class. Then copy the 32-byte
3680      map into the code vector, negating it if necessary. */      map into the code vector, negating it if necessary. */
3681    
3682      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
3683      if (negate_class)      if (negate_class)
3684        {        {
# Line 3396  we set the flag only if there is a liter Line 3697  we set the flag only if there is a liter
3697      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3698      has been tested above. */      has been tested above. */
3699    
3700      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3701      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3702      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3703      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3704      goto REPEAT;      goto REPEAT;
3705    
3706      case '*':      case CHAR_ASTERISK:
3707      repeat_min = 0;      repeat_min = 0;
3708      repeat_max = -1;      repeat_max = -1;
3709      goto REPEAT;      goto REPEAT;
3710    
3711      case '+':      case CHAR_PLUS:
3712      repeat_min = 1;      repeat_min = 1;
3713      repeat_max = -1;      repeat_max = -1;
3714      goto REPEAT;      goto REPEAT;
3715    
3716      case '?':      case CHAR_QUESTION_MARK:
3717      repeat_min = 0;      repeat_min = 0;
3718      repeat_max = 1;      repeat_max = 1;
3719    
# Line 3447  we set the flag only if there is a liter Line 3748  we set the flag only if there is a liter
3748      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3749      repeat type to the non-default. */      repeat type to the non-default. */
3750    
3751      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3752        {        {
3753        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3754        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3755        ptr++;        ptr++;
3756        }        }
3757      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3758        {        {
3759        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3760        ptr++;        ptr++;
# Line 3779  we set the flag only if there is a liter Line 4080  we set the flag only if there is a liter
4080    
4081        if (repeat_min == 0)        if (repeat_min == 0)
4082          {          {
4083          /* If the maximum is also zero, we just omit the group from the output          /* If the maximum is also zero, we used to just omit the group from the
4084          altogether. */          output altogether, like this:
   
         if (repeat_max == 0)  
           {  
           code = previous;  
           goto END_REPEAT;  
           }  
4085    
4086          /* If the maximum is 1 or unlimited, we just have to stick in the          ** if (repeat_max == 0)
4087          BRAZERO and do no more at this point. However, we do need to adjust          **   {
4088          any OP_RECURSE calls inside the group that refer to the group itself or          **   code = previous;
4089          any internal or forward referenced group, because the offset is from          **   goto END_REPEAT;
4090          the start of the whole regex. Temporarily terminate the pattern while          **   }
4091          doing this. */  
4092            However, that fails when a group is referenced as a subroutine from
4093            elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it
4094            so that it is skipped on execution. As we don't have a list of which
4095            groups are referenced, we cannot do this selectively.
4096    
4097            If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
4098            and do no more at this point. However, we do need to adjust any
4099            OP_RECURSE calls inside the group that refer to the group itself or any
4100            internal or forward referenced group, because the offset is from the
4101            start of the whole regex. Temporarily terminate the pattern while doing
4102            this. */
4103    
4104          if (repeat_max <= 1)          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
4105            {            {
4106            *code = OP_END;            *code = OP_END;
4107            adjust_recurse(previous, 1, utf8, cd, save_hwm);            adjust_recurse(previous, 1, utf8, cd, save_hwm);
4108            memmove(previous+1, previous, len);            memmove(previous+1, previous, len);
4109            code++;            code++;
4110              if (repeat_max == 0)
4111                {
4112                *previous++ = OP_SKIPZERO;
4113                goto END_REPEAT;
4114                }
4115            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
4116            }            }
4117    
# Line 3995  we set the flag only if there is a liter Line 4306  we set the flag only if there is a liter
4306          }          }
4307        }        }
4308    
4309        /* If previous is OP_FAIL, it was generated by an empty class [] in
4310        JavaScript mode. The other ways in which OP_FAIL can be generated, that is
4311        by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat"
4312        error above. We can just ignore the repeat in JS case. */
4313    
4314        else if (*previous == OP_FAIL) goto END_REPEAT;
4315    
4316      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
4317    
4318      else      else
# Line 4021  we set the flag only if there is a liter Line 4339  we set the flag only if there is a liter
4339        int len;        int len;
4340        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
4341            *tempcode == OP_NOTEXACT)            *tempcode == OP_NOTEXACT)
4342          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode] +
4343              ((*tempcode == OP_TYPEEXACT &&
4344                 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
4345        len = code - tempcode;        len = code - tempcode;
4346        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4347          {          {
# Line 4067  we set the flag only if there is a liter Line 4387  we set the flag only if there is a liter
4387      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4388      parenthesis forms.  */      parenthesis forms.  */
4389    
4390      case '(':      case CHAR_LEFT_PARENTHESIS:
4391      newoptions = options;      newoptions = options;
4392      skipbytes = 0;      skipbytes = 0;
4393      bravalue = OP_CBRA;      bravalue = OP_CBRA;
# Line 4076  we set the flag only if there is a liter Line 4396  we set the flag only if there is a liter
4396    
4397      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4398    
4399      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4400        {        {
4401        int i, namelen;        int i, namelen;
4402        const char *vn = verbnames;        const char *vn = verbnames;
4403        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4404        previous = NULL;        previous = NULL;
4405        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4406        if (*ptr == ':')        if (*ptr == CHAR_COLON)
4407          {          {
4408          *errorcodeptr = ERR59;   /* Not supported */          *errorcodeptr = ERR59;   /* Not supported */
4409          goto FAILED;          goto FAILED;
4410          }          }
4411        if (*ptr != ')')        if (*ptr != CHAR_RIGHT_PARENTHESIS)
4412          {          {
4413          *errorcodeptr = ERR60;          *errorcodeptr = ERR60;
4414          goto FAILED;          goto FAILED;
# Line 4113  we set the flag only if there is a liter Line 4433  we set the flag only if there is a liter
4433      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
4434      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
4435    
4436      else if (*ptr == '?')      else if (*ptr == CHAR_QUESTION_MARK)
4437        {        {
4438        int i, set, unset, namelen;        int i, set, unset, namelen;
4439        int *optset;        int *optset;
# Line 4122  we set the flag only if there is a liter Line 4442  we set the flag only if there is a liter
4442    
4443        switch (*(++ptr))        switch (*(++ptr))
4444          {          {
4445          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4446          ptr++;          ptr++;
4447          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4448          if (*ptr == 0)          if (*ptr == 0)
4449            {            {
4450            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 4134  we set the flag only if there is a liter Line 4454  we set the flag only if there is a liter
4454    
4455    
4456          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4457          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4458          reset_bracount = TRUE;          reset_bracount = TRUE;
4459          /* Fall through */          /* Fall through */
4460    
4461          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4462          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4463          bravalue = OP_BRA;          bravalue = OP_BRA;
4464          ptr++;          ptr++;
4465          break;          break;
4466    
4467    
4468          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4469          case '(':          case CHAR_LEFT_PARENTHESIS:
4470          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4471    
4472          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 4166  we set the flag only if there is a liter Line 4486  we set the flag only if there is a liter
4486          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4487          including assertions, are processed. */          including assertions, are processed. */
4488    
4489          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4490                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4491            break;            break;
4492    
4493          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 4178  we set the flag only if there is a liter Line 4499  we set the flag only if there is a liter
4499    
4500          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4501    
4502          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4503            {            {
4504            terminator = -1;            terminator = -1;
4505            ptr += 2;            ptr += 2;
# Line 4188  we set the flag only if there is a liter Line 4509  we set the flag only if there is a liter
4509          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4510          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4511    
4512          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4513            {            {
4514            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4515            ptr++;            ptr++;
4516            }            }
4517          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4518            {            {
4519            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4520            ptr++;            ptr++;
4521            }            }
4522          else          else
4523            {            {
4524            terminator = 0;            terminator = 0;
4525            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4526            }            }
4527    
4528          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 4221  we set the flag only if there is a liter Line 4542  we set the flag only if there is a liter
4542            {            {
4543            if (recno >= 0)            if (recno >= 0)
4544              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4545                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4546            ptr++;            ptr++;
4547            }            }
4548          namelen = ptr - name;          namelen = ptr - name;
4549    
4550          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4551                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4552            {            {
4553            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4554            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 4248  we set the flag only if there is a liter Line 4570  we set the flag only if there is a liter
4570              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4571              goto FAILED;              goto FAILED;
4572              }              }
4573            recno = (refsign == '-')?            recno = (refsign == CHAR_MINUS)?
4574              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4575            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4576              {              {
# Line 4279  we set the flag only if there is a liter Line 4601  we set the flag only if there is a liter
4601    
4602          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4603    
4604          else if ((i = find_parens(ptr, cd->bracount, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4605                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4606            {            {
4607            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4300  we set the flag only if there is a liter Line 4622  we set the flag only if there is a liter
4622          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4623          specific group number. */          specific group number. */
4624    
4625          else if (*name == 'R')          else if (*name == CHAR_R)
4626            {            {
4627            recno = 0;            recno = 0;
4628            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4310  we set the flag only if there is a liter Line 4632  we set the flag only if there is a liter
4632                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4633                goto FAILED;                goto FAILED;
4634                }                }
4635              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4636              }              }
4637            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4638            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4320  we set the flag only if there is a liter Line 4642  we set the flag only if there is a liter
4642          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4643          false. */          false. */
4644    
4645          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4646            {            {
4647            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4648            skipbytes = 1;            skipbytes = 1;
4649            }            }
4650    
4651          /* Check for the "name" actually being a subpattern number. We are          /* Check for the "name" actually being a subpattern number. We are
4652          in the second pass here, so final_bracount is set. */          in the second pass here, so final_bracount is set. */
4653    
4654          else if (recno > 0 && recno <= cd->final_bracount)          else if (recno > 0 && recno <= cd->final_bracount)
4655            {            {
# Line 4345  we set the flag only if there is a liter Line 4667  we set the flag only if there is a liter
4667    
4668    
4669          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4670          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4671          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4672          ptr++;          ptr++;
4673          break;          break;
4674    
4675    
4676          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4677          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
4678          ptr++;          ptr++;
4679          if (*ptr == ')')          /* Optimize (?!) */          if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4680            {            {
4681            *code++ = OP_FAIL;            *code++ = OP_FAIL;
4682            previous = NULL;            previous = NULL;
# Line 4365  we set the flag only if there is a liter Line 4687  we set the flag only if there is a liter
4687    
4688    
4689          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4690          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4691          switch (ptr[1])          switch (ptr[1])
4692            {            {
4693            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4694            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4695            ptr += 2;            ptr += 2;
4696            break;            break;
4697    
4698            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4699            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4700            ptr += 2;            ptr += 2;
4701            break;            break;
# Line 4388  we set the flag only if there is a liter Line 4710  we set the flag only if there is a liter
4710    
4711    
4712          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4713          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4714          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4715          ptr++;          ptr++;
4716          break;          break;
4717    
4718    
4719          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4720          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4721          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4722          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4723          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4724            {            {
4725            int n = 0;            int n = 0;
4726            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4727              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4728            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4729              {              {
4730              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4731              goto FAILED;              goto FAILED;
# Line 4423  we set the flag only if there is a liter Line 4745  we set the flag only if there is a liter
4745    
4746    
4747          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4748          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4749          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4750                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4751            {            {
4752            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4753            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4754            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4755            }            }
4756          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4757            {            {
4758            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4759            goto FAILED;            goto FAILED;
# Line 4440  we set the flag only if there is a liter Line 4763  we set the flag only if there is a liter
4763    
4764          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4765          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4766          case '\'':          case CHAR_APOSTROPHE:
4767            {            {
4768            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4769                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4770            name = ++ptr;            name = ++ptr;
4771    
4772            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4516  we set the flag only if there is a liter Line 4840  we set the flag only if there is a liter
4840    
4841    
4842          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4843          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4844          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4845          is_recurse = TRUE;          is_recurse = TRUE;
4846          /* Fall through */          /* Fall through */
4847    
# Line 4525  we set the flag only if there is a liter Line 4849  we set the flag only if there is a liter
4849          references (?P=name) and recursion (?P>name), as well as falling          references (?P=name) and recursion (?P>name), as well as falling
4850          through from the Perl recursion syntax (?&name). We also come here from          through from the Perl recursion syntax (?&name). We also come here from
4851          the Perl \k<name> or \k'name' back reference syntax and the \k{name}          the Perl \k<name> or \k'name' back reference syntax and the \k{name}
4852          .NET syntax. */          .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
4853    
4854          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
4855          name = ++ptr;          name = ++ptr;
# Line 4541  we set the flag only if there is a liter Line 4865  we set the flag only if there is a liter
4865              {              {
4866              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
4867              goto FAILED;              goto FAILED;
4868              }              }
4869            if (*ptr != terminator)            if (*ptr != terminator)
4870              {              {
4871              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4555  we set the flag only if there is a liter Line 4879  we set the flag only if there is a liter
4879            recno = 0;            recno = 0;
4880            }            }
4881    
4882          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, seek the name in the table. We check the name
4883          first, and then check that we have reached the end of the name in the          first, and then check that we have reached the end of the name in the
4884          table. That way, if the name that is longer than any in the table,          table. That way, if the name that is longer than any in the table,
4885          the comparison will fail without reading beyond the table entry. */          the comparison will fail without reading beyond the table entry. */
4886    
# Line 4566  we set the flag only if there is a liter Line 4890  we set the flag only if there is a liter
4890            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4891              {              {
4892              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4893                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
4894                break;                break;
4895              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4896              }              }
# Line 4576  we set the flag only if there is a liter Line 4900  we set the flag only if there is a liter
4900              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4901              }              }
4902            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4903                      find_parens(ptr, cd->bracount, name, namelen,                      find_parens(cd, name, namelen,
4904                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4905              {              {
4906              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4592  we set the flag only if there is a liter Line 4916  we set the flag only if there is a liter
4916    
4917    
4918          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4919          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4920          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4921          /* Fall through */          /* Fall through */
4922    
4923    
4924          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4925          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4926          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4927          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4928            {            {
4929            const uschar *called;            const uschar *called;
4930              terminator = CHAR_RIGHT_PARENTHESIS;
4931    
4932              /* Come here from the \g<...> and \g'...' code (Oniguruma
4933              compatibility). However, the syntax has been checked to ensure that
4934              the ... are a (signed) number, so that neither ERR63 nor ERR29 will
4935              be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
4936              ever be taken. */
4937    
4938            if ((refsign = *ptr) == '+')            HANDLE_NUMERICAL_RECURSION:
4939    
4940              if ((refsign = *ptr) == CHAR_PLUS)
4941              {              {
4942              ptr++;              ptr++;
4943              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
4944                {                {
4945                *errorcodeptr = ERR63;                *errorcodeptr = ERR63;
4946                goto FAILED;                goto FAILED;
4947                }                }
4948              }              }
4949            else if (refsign == '-')            else if (refsign == CHAR_MINUS)
4950              {              {
4951              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4952                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4622  we set the flag only if there is a liter Line 4955  we set the flag only if there is a liter
4955    
4956            recno = 0;            recno = 0;
4957            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4958              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4959    
4960            if (*ptr != ')')            if (*ptr != terminator)
4961              {              {
4962              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4963              goto FAILED;              goto FAILED;
4964              }              }
4965    
4966            if (refsign == '-')            if (refsign == CHAR_MINUS)
4967              {              {
4968              if (recno == 0)              if (recno == 0)
4969                {                {
# Line 4644  we set the flag only if there is a liter Line 4977  we set the flag only if there is a liter
4977                goto FAILED;                goto FAILED;
4978                }                }
4979              }              }
4980            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
4981              {              {
4982              if (recno == 0)              if (recno == 0)
4983                {                {
# Line 4677  we set the flag only if there is a liter Line 5010  we set the flag only if there is a liter
5010    
5011              if (called == NULL)              if (called == NULL)
5012                {                {
5013                if (find_parens(ptr, cd->bracount, NULL, recno,                if (find_parens(cd, NULL, recno,
5014                     (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5015                  {                  {
5016                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5017                  goto FAILED;                  goto FAILED;
# Line 4730  we set the flag only if there is a liter Line 5063  we set the flag only if there is a liter
5063          set = unset = 0;          set = unset = 0;
5064          optset = &set;          optset = &set;
5065    
5066          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5067            {            {
5068            switch (*ptr++)            switch (*ptr++)
5069              {              {
5070              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5071    
5072              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5073              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5074              cd->external_flags |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5075              break;              break;
5076    
5077              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5078              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5079              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5080              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5081              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5082              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5083    
5084              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5085                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4777  we set the flag only if there is a liter Line 5110  we set the flag only if there is a liter
5110          both phases.          both phases.
5111    
5112          If we are not at the pattern start, compile code to change the ims          If we are not at the pattern start, compile code to change the ims
5113          options if this setting actually changes any of them. We also pass the          options if this setting actually changes any of them, and reset the
5114          new setting back so that it can be put at the start of any following          greedy defaults and the case value for firstbyte and reqbyte. */
         branches, and when this group ends (if we are in a group), a resetting  
         item can be compiled. */  
5115    
5116          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5117            {            {
5118            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5119                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
5120              {              {
5121              cd->external_options = newoptions;              cd->external_options = newoptions;
             options = newoptions;  
5122              }              }
5123           else           else
5124              {              {
# Line 4797  we set the flag only if there is a liter Line 5127  we set the flag only if there is a liter
5127                *code++ = OP_OPT;                *code++ = OP_OPT;
5128                *code++ = newoptions & PCRE_IMS;                *code++ = newoptions & PCRE_IMS;
5129                }                }
   
             /* Change options at this level, and pass them back for use  
             in subsequent branches. Reset the greedy defaults and the case  
             value for firstbyte and reqbyte. */  
   
             *optionsptr = options = newoptions;  
5130              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);              greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
5131              greedy_non_default = greedy_default ^ 1;              greedy_non_default = greedy_default ^ 1;
5132              req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;              req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
5133              }              }
5134    
5135              /* Change options at this level, and pass them back for use
5136              in subsequent branches. When not at the start of the pattern, this
5137              information is also necessary so that a resetting item can be
5138              compiled at the end of a group (if we are in a group). */
5139    
5140              *optionsptr = options = newoptions;
5141            previous = NULL;       /* This item can't be repeated */            previous = NULL;       /* This item can't be repeated */
5142            continue;              /* It is complete */            continue;              /* It is complete */
5143            }            }
# Line 4923  we set the flag only if there is a liter Line 5253  we set the flag only if there is a liter
5253    
5254      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5255    
5256      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5257        {        {
5258        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5259        goto FAILED;        goto FAILED;
# Line 5021  we set the flag only if there is a liter Line 5351  we set the flag only if there is a liter
5351      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5352      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5353    
5354      case '\\':      case CHAR_BACKSLASH:
5355      tempptr = ptr;      tempptr = ptr;
5356      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5357      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 5030  we set the flag only if there is a liter Line 5360  we set the flag only if there is a liter
5360        {        {
5361        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5362          {          {
5363          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5364            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5365                else inescq = TRUE;
5366          continue;          continue;
5367          }          }
5368    
# Line 5048  we set the flag only if there is a liter Line 5379  we set the flag only if there is a liter
5379        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
5380        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
5381    
5382          /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
5383          is a subroutine call by number (Oniguruma syntax). In fact, the value
5384          -ESC_g is returned only for these cases. So we don't need to check for <
5385          or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is
5386          -ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as
5387          that is a synonym for a named back reference). */
5388    
5389          if (-c == ESC_g)
5390            {
5391            const uschar *p;
5392            save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5393            terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5394              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5395    
5396            /* These two statements stop the compiler for warning about possibly
5397            unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
5398            fact, because we actually check for a number below, the paths that
5399            would actually be in error are never taken. */
5400    
5401            skipbytes = 0;
5402            reset_bracount = FALSE;
5403    
5404            /* Test for a name */
5405    
5406            if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5407              {
5408              BOOL isnumber = TRUE;
5409              for (p = ptr + 1; *p != 0 && *p != terminator; p++)
5410                {
5411                if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
5412                if ((cd->ctypes[*p] & ctype_word) == 0) break;
5413                }
5414              if (*p != terminator)
5415                {
5416                *errorcodeptr = ERR57;
5417                break;
5418                }
5419              if (isnumber)
5420                {
5421                ptr++;
5422                goto HANDLE_NUMERICAL_RECURSION;
5423                }
5424              is_recurse = TRUE;
5425              goto NAMED_REF_OR_RECURSE;
5426              }
5427    
5428            /* Test a signed number in angle brackets or quotes. */
5429    
5430            p = ptr + 2;
5431            while ((digitab[*p] & ctype_digit) != 0) p++;
5432            if (*p != terminator)
5433              {
5434              *errorcodeptr = ERR57;
5435              break;
5436              }
5437            ptr++;
5438            goto HANDLE_NUMERICAL_RECURSION;
5439            }
5440    
5441        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5442        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5443    
5444        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5445              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5446          {          {
5447          is_recurse = FALSE;          is_recurse = FALSE;
5448          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5449              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5450              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5451          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5452          }          }
5453    
# Line 5157  we set the flag only if there is a liter Line 5550  we set the flag only if there is a liter
5550    
5551      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5552    
5553      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5554        cd->external_flags |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5555    
5556      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
# Line 5402  for (;;) Line 5795  for (;;)
5795    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5796    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5797    
5798    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5799      {      {
5800      if (lengthptr == NULL)      if (lengthptr == NULL)
5801        {        {
# Line 5425  for (;;) Line 5818  for (;;)
5818    
5819      /* Resetting option if needed */      /* Resetting option if needed */
5820    
5821      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5822        {        {
5823        *code++ = OP_OPT;        *code++ = OP_OPT;
5824        *code++ = oldims;        *code++ = oldims;
# Line 5554  do { Line 5947  do {
5947       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;       if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;
5948       }       }
5949    
5950     /* .* is not anchored unless DOTALL is set and it isn't in brackets that     /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
5951     are or may be referenced. */     it isn't in brackets that are or may be referenced. */
5952    
5953     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
5954               op == OP_TYPEPOSSTAR) &&               op == OP_TYPEPOSSTAR))
             (*options & PCRE_DOTALL) != 0)  
5955       {       {
5956       if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;       if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0)
5957           return FALSE;
5958       }       }
5959    
5960     /* Check for explicit anchoring */     /* Check for explicit anchoring */
# Line 5607  do { Line 6000  do {
6000       NULL, 0, FALSE);       NULL, 0, FALSE);
6001     register int op = *scode;     register int op = *scode;
6002    
6003       /* If we are at the start of a conditional assertion group, *both* the
6004       conditional assertion *and* what follows the condition must satisfy the test
6005       for start of line. Other kinds of condition fail. Note that there may be an
6006       auto-callout at the start of a condition. */
6007    
6008       if (op == OP_COND)
6009         {
6010         scode += 1 + LINK_SIZE;
6011         if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6012         switch (*scode)
6013           {
6014           case OP_CREF:
6015           case OP_RREF:
6016           case OP_DEF:
6017           return FALSE;
6018    
6019           default:     /* Assertion */
6020           if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6021           do scode += GET(scode, 1); while (*scode == OP_ALT);
6022           scode += 1 + LINK_SIZE;
6023           break;
6024           }
6025         scode = first_significant_code(scode, NULL, 0, FALSE);
6026         op = *scode;
6027         }
6028    
6029     /* Non-capturing brackets */     /* Non-capturing brackets */
6030    
6031     if (op == OP_BRA)     if (op == OP_BRA)
# Line 5625  do { Line 6044  do {
6044    
6045     /* Other brackets */     /* Other brackets */
6046    
6047     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE)
6048       { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }       {
6049         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6050         }
6051    
6052     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
6053     may be referenced. */     may be referenced. */
# Line 5743  Returns: pointer to compiled data Line 6164  Returns: pointer to compiled data
6164                  with errorptr and erroroffset set                  with errorptr and erroroffset set
6165  */  */
6166    
6167  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6168  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
6169    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
6170  {  {
# Line 5751  return pcre_compile2(pattern, options, N Line 6172  return pcre_compile2(pattern, options, N
6172  }  }
6173    
6174    
6175  PCRE_EXP_DEFN pcre *  PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
6176  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
6177    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
6178  {  {
# Line 5778  to fill in forward references to subpatt Line 6199  to fill in forward references to subpatt
6199    
6200  uschar cworkspace[COMPILE_WORK_SIZE];  uschar cworkspace[COMPILE_WORK_SIZE];
6201    
   
6202  /* Set this early so that early errors get offset 0. */  /* Set this early so that early errors get offset 0. */
6203    
6204  ptr = (const uschar *)pattern;  ptr = (const uschar *)pattern;
# Line 5824  if ((options & PCRE_UTF8) != 0) Line 6244  if ((options & PCRE_UTF8) != 0)
6244    }    }
6245  #endif  #endif
6246    
6247  if ((options & ~PUBLIC_OPTIONS) != 0)  if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6248    {    {
6249    errorcode = ERR17;    errorcode = ERR17;
6250    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
# Line 5841  cd->ctypes = tables + ctypes_offset; Line 6261  cd->ctypes = tables + ctypes_offset;
6261  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6262  the offset for later. */  the offset for later. */
6263    
6264  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6265           ptr[skipatstart+1] == CHAR_ASTERISK)
6266    {    {
6267    int newnl = 0;    int newnl = 0;
6268    int newbsr = 0;    int newbsr = 0;
6269    
6270    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6271      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6272    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6273      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6274    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6275      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6276    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6277      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6278    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6279      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6280    
6281    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6282      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6283    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6284      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6285    
6286    if (newnl != 0)    if (newnl != 0)
# Line 5887  current code allows for fixed one- or tw Line 6308  current code allows for fixed one- or tw
6308  switch (options & PCRE_NEWLINE_BITS)  switch (options & PCRE_NEWLINE_BITS)
6309    {    {
6310    case 0: newline = NEWLINE; break;   /* Build-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6311    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6312    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6313    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6314         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6315    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6316    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6317    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;

Legend:
Removed from v.274  
changed lines
  Added in v.411

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12