/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 360 by ph10, Wed Jul 9 20:00:28 2008 UTC revision 411 by ph10, Fri Apr 10 15:40:21 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 97  are simple data values; negative values Line 97  are simple data values; negative values
97  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
98  is invalid. */  is invalid. */
99    
100  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC
101    
102    /* This is the "normal" table for ASCII systems or for EBCDIC systems running
103    in UTF-8 mode. */
104    
105  static const short int escapes[] = {  static const short int escapes[] = {
106       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,                       0,
107       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,                       0,
108     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */       0,                       0,
109  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */       0,                       0,
110  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */       0,                       0,
111  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */       CHAR_COLON,              CHAR_SEMICOLON,
112     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */       CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
113  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */       CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
114  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */       CHAR_COMMERCIAL_AT,      -ESC_A,
115       0,      0, -ESC_z                                            /* x - z */       -ESC_B,                  -ESC_C,
116         -ESC_D,                  -ESC_E,
117         0,                       -ESC_G,
118         -ESC_H,                  0,
119         0,                       -ESC_K,
120         0,                       0,
121         0,                       0,
122         -ESC_P,                  -ESC_Q,
123         -ESC_R,                  -ESC_S,
124         0,                       0,
125         -ESC_V,                  -ESC_W,
126         -ESC_X,                  0,
127         -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
128         CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
129         CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
130         CHAR_GRAVE_ACCENT,       7,
131         -ESC_b,                  0,
132         -ESC_d,                  ESC_e,
133         ESC_f,                   0,
134         -ESC_h,                  0,
135         0,                       -ESC_k,
136         0,                       0,
137         ESC_n,                   0,
138         -ESC_p,                  0,
139         ESC_r,                   -ESC_s,
140         ESC_tee,                 0,
141         -ESC_v,                  -ESC_w,
142         0,                       0,
143         -ESC_z
144  };  };
145    
146  #else           /* This is the "abnormal" table for EBCDIC systems */  #else
147    
148    /* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
149    
150  static const short int escapes[] = {  static const short int escapes[] = {
151  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
152  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 142  static const short int escapes[] = { Line 177  static const short int escapes[] = {
177    
178  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
179  searched linearly. Put all the names into a single string, in order to reduce  searched linearly. Put all the names into a single string, in order to reduce
180  the number of relocations when a shared library is dynamically linked. */  the number of relocations when a shared library is dynamically linked. The
181    string is built from string macros so that it works in UTF-8 mode on EBCDIC
182    platforms. */
183    
184  typedef struct verbitem {  typedef struct verbitem {
185    int   len;    int   len;
# Line 150  typedef struct verbitem { Line 187  typedef struct verbitem {
187  } verbitem;  } verbitem;
188    
189  static const char verbnames[] =  static const char verbnames[] =
190    "ACCEPT\0"    STRING_ACCEPT0
191    "COMMIT\0"    STRING_COMMIT0
192    "F\0"    STRING_F0
193    "FAIL\0"    STRING_FAIL0
194    "PRUNE\0"    STRING_PRUNE0
195    "SKIP\0"    STRING_SKIP0
196    "THEN";    STRING_THEN;
197    
198  static const verbitem verbs[] = {  static const verbitem verbs[] = {
199    { 6, OP_ACCEPT },    { 6, OP_ACCEPT },
# Line 178  length entry. The first three must be al Line 215  length entry. The first three must be al
215  for handling case independence. */  for handling case independence. */
216    
217  static const char posix_names[] =  static const char posix_names[] =
218    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
219    "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"    STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
220    "word\0"   "xdigit";    STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
221      STRING_word0  STRING_xdigit;
222    
223  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
224    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 322  For convenience, we use the same bit def Line 360  For convenience, we use the same bit def
360    
361  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
362    
363  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC
364    
365    /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
366    UTF-8 mode. */
367    
368  static const unsigned char digitab[] =  static const unsigned char digitab[] =
369    {    {
370    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 358  static const unsigned char digitab[] = Line 400  static const unsigned char digitab[] =
400    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
401    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
402    
403  #else           /* This is the "abnormal" case, for EBCDIC systems */  #else
404    
405    /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
406    
407  static const unsigned char digitab[] =  static const unsigned char digitab[] =
408    {    {
409    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 455  static const char * Line 500  static const char *
500  find_error_text(int n)  find_error_text(int n)
501  {  {
502  const char *s = error_texts;  const char *s = error_texts;
503  for (; n > 0; n--) while (*s++ != 0);  for (; n > 0; n--) while (*s++ != 0) {};
504  return s;  return s;
505  }  }
506    
# Line 503  if (c == 0) *errorcodeptr = ERR1; Line 548  if (c == 0) *errorcodeptr = ERR1;
548  in a table. A non-zero result is something that can be returned immediately.  in a table. A non-zero result is something that can be returned immediately.
549  Otherwise further processing may be required. */  Otherwise further processing may be required. */
550    
551  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
552  else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */  else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
553  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - CHAR_0]) != 0) c = i;
554    
555  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
556  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
# Line 524  else Line 569  else
569      /* A number of Perl escapes are not handled by PCRE. We give an explicit      /* A number of Perl escapes are not handled by PCRE. We give an explicit
570      error. */      error. */
571    
572      case 'l':      case CHAR_l:
573      case 'L':      case CHAR_L:
574      case 'N':      case CHAR_N:
575      case 'u':      case CHAR_u:
576      case 'U':      case CHAR_U:
577      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
578      break;      break;
579    
# Line 548  else Line 593  else
593      (possibly recursive) subroutine calls, _not_ backreferences. Just return      (possibly recursive) subroutine calls, _not_ backreferences. Just return
594      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
595    
596      case 'g':      case CHAR_g:
597      if (ptr[1] == '<' || ptr[1] == '\'')      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
598        {        {
599        c = -ESC_g;        c = -ESC_g;
600        break;        break;
# Line 557  else Line 602  else
602    
603      /* Handle the Perl-compatible cases */      /* Handle the Perl-compatible cases */
604    
605      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
606        {        {
607        const uschar *p;        const uschar *p;
608        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
609          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
610        if (*p != 0 && *p != '}')        if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
611          {          {
612          c = -ESC_k;          c = -ESC_k;
613          break;          break;
# Line 572  else Line 617  else
617        }        }
618      else braced = FALSE;      else braced = FALSE;
619    
620      if (ptr[1] == '-')      if (ptr[1] == CHAR_MINUS)
621        {        {
622        negated = TRUE;        negated = TRUE;
623        ptr++;        ptr++;
# Line 581  else Line 626  else
626    
627      c = 0;      c = 0;
628      while ((digitab[ptr[1]] & ctype_digit) != 0)      while ((digitab[ptr[1]] & ctype_digit) != 0)
629        c = c * 10 + *(++ptr) - '0';        c = c * 10 + *(++ptr) - CHAR_0;
630    
631      if (c < 0)   /* Integer overflow */      if (c < 0)   /* Integer overflow */
632        {        {
# Line 589  else Line 634  else
634        break;        break;
635        }        }
636    
637      if (braced && *(++ptr) != '}')      if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
638        {        {
639        *errorcodeptr = ERR57;        *errorcodeptr = ERR57;
640        break;        break;
# Line 626  else Line 671  else
671      value is greater than 377, the least significant 8 bits are taken. Inside a      value is greater than 377, the least significant 8 bits are taken. Inside a
672      character class, \ followed by a digit is always an octal number. */      character class, \ followed by a digit is always an octal number. */
673    
674      case '1': case '2': case '3': case '4': case '5':      case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
675      case '6': case '7': case '8': case '9':      case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
676    
677      if (!isclass)      if (!isclass)
678        {        {
679        oldptr = ptr;        oldptr = ptr;
680        c -= '0';        c -= CHAR_0;
681        while ((digitab[ptr[1]] & ctype_digit) != 0)        while ((digitab[ptr[1]] & ctype_digit) != 0)
682          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - CHAR_0;
683        if (c < 0)    /* Integer overflow */        if (c < 0)    /* Integer overflow */
684          {          {
685          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
# Line 652  else Line 697  else
697      generates a binary zero byte and treats the digit as a following literal.      generates a binary zero byte and treats the digit as a following literal.
698      Thus we have to pull back the pointer by one. */      Thus we have to pull back the pointer by one. */
699    
700      if ((c = *ptr) >= '8')      if ((c = *ptr) >= CHAR_8)
701        {        {
702        ptr--;        ptr--;
703        c = 0;        c = 0;
# Line 665  else Line 710  else
710      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more      to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
711      than 3 octal digits. */      than 3 octal digits. */
712    
713      case '0':      case CHAR_0:
714      c -= '0';      c -= CHAR_0;
715      while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')      while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
716          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - CHAR_0;
717      if (!utf8 && c > 255) *errorcodeptr = ERR51;      if (!utf8 && c > 255) *errorcodeptr = ERR51;
718      break;      break;
719    
# Line 676  else Line 721  else
721      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
722      treated as a data character. */      treated as a data character. */
723    
724      case 'x':      case CHAR_x:
725      if (ptr[1] == '{')      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
726        {        {
727        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
728        int count = 0;        int count = 0;
# Line 686  else Line 731  else
731        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
732          {          {
733          register int cc = *pt++;          register int cc = *pt++;
734          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
735          count++;          count++;
736    
737  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
738          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
739          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
740  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
741          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
742          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
743  #endif  #endif
744          }          }
745    
746        if (*pt == '}')        if (*pt == CHAR_RIGHT_CURLY_BRACKET)
747          {          {
748          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
749          ptr = pt;          ptr = pt;
# Line 714  else Line 759  else
759      c = 0;      c = 0;
760      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
761        {        {
762        int cc;                               /* Some compilers don't like ++ */        int cc;                                  /* Some compilers don't like */
763        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                           /* ++ in initializers */
764  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
765        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
766        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
767  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
768        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
769        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
770  #endif  #endif
771        }        }
772      break;      break;
# Line 730  else Line 775  else
775      This coding is ASCII-specific, but then the whole concept of \cx is      This coding is ASCII-specific, but then the whole concept of \cx is
776      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */      ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
777    
778      case 'c':      case CHAR_c:
779      c = *(++ptr);      c = *(++ptr);
780      if (c == 0)      if (c == 0)
781        {        {
# Line 738  else Line 783  else
783        break;        break;
784        }        }
785    
786  #ifndef EBCDIC  /* ASCII coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
787      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= CHAR_a && c <= CHAR_z) c -= 32;
788      c ^= 0x40;      c ^= 0x40;
789  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
790      if (c >= 'a' && c <= 'z') c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
791      c ^= 0xC0;      c ^= 0xC0;
792  #endif  #endif
793      break;      break;
# Line 804  if (c == 0) goto ERROR_RETURN; Line 849  if (c == 0) goto ERROR_RETURN;
849  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
850  negation. */  negation. */
851    
852  if (c == '{')  if (c == CHAR_LEFT_CURLY_BRACKET)
853    {    {
854    if (ptr[1] == '^')    if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
855      {      {
856      *negptr = TRUE;      *negptr = TRUE;
857      ptr++;      ptr++;
# Line 815  if (c == '{') Line 860  if (c == '{')
860      {      {
861      c = *(++ptr);      c = *(++ptr);
862      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
863      if (c == '}') break;      if (c == CHAR_RIGHT_CURLY_BRACKET) break;
864      name[i] = c;      name[i] = c;
865      }      }
866    if (c !='}') goto ERROR_RETURN;    if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
867    name[i] = 0;    name[i] = 0;
868    }    }
869    
# Line 883  is_counted_repeat(const uschar *p) Line 928  is_counted_repeat(const uschar *p)
928  {  {
929  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
930  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
931  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
932    
933  if (*p++ != ',') return FALSE;  if (*p++ != CHAR_COMMA) return FALSE;
934  if (*p == '}') return TRUE;  if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
935    
936  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
937  while ((digitab[*p] & ctype_digit) != 0) p++;  while ((digitab[*p] & ctype_digit) != 0) p++;
938    
939  return (*p == '}');  return (*p == CHAR_RIGHT_CURLY_BRACKET);
940  }  }
941    
942    
# Line 924  int max = -1; Line 969  int max = -1;
969  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
970  an integer overflow. */  an integer overflow. */
971    
972  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
973  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
974    {    {
975    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 934  if (min < 0 || min > 65535) Line 979  if (min < 0 || min > 65535)
979  /* Read the maximum value if there is one, and again do a paranoid on its size.  /* Read the maximum value if there is one, and again do a paranoid on its size.
980  Also, max must not be less than min. */  Also, max must not be less than min. */
981    
982  if (*p == '}') max = min; else  if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
983    {    {
984    if (*(++p) != '}')    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
985      {      {
986      max = 0;      max = 0;
987      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
988      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
989        {        {
990        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 964  return p; Line 1009  return p;
1009    
1010    
1011  /*************************************************  /*************************************************
1012  *       Find forward referenced subpattern       *  *  Subroutine for finding forward reference      *
1013  *************************************************/  *************************************************/
1014    
1015  /* This function scans along a pattern's text looking for capturing  /* This recursive function is called only from find_parens() below. The
1016    top-level call starts at the beginning of the pattern. All other calls must
1017    start at a parenthesis. It scans along a pattern's text looking for capturing
1018  subpatterns, and counting them. If it finds a named pattern that matches the  subpatterns, and counting them. If it finds a named pattern that matches the
1019  name it is given, it returns its number. Alternatively, if the name is NULL, it  name it is given, it returns its number. Alternatively, if the name is NULL, it
1020  returns when it reaches a given numbered subpattern. This is used for forward  returns when it reaches a given numbered subpattern. We know that if (?P< is
1021  references to subpatterns. We know that if (?P< is encountered, the name will  encountered, the name will be terminated by '>' because that is checked in the
1022  be terminated by '>' because that is checked in the first pass.  first pass. Recursion is used to keep track of subpatterns that reset the
1023    capturing group numbers - the (?| feature.
1024    
1025  Arguments:  Arguments:
1026    ptr          current position in the pattern    ptrptr       address of the current character pointer (updated)
1027    cd           compile background data    cd           compile background data
1028    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1029    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1030    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1031      count        pointer to the current capturing subpattern number (updated)
1032    
1033  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
1034  */  */
1035    
1036  static int  static int
1037  find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1038    BOOL xmode)    BOOL xmode, int *count)
1039  {  {
1040  const uschar *thisname;  uschar *ptr = *ptrptr;
1041  int count = cd->bracount;  int start_count = *count;
1042    int hwm_count = start_count;
1043    BOOL dup_parens = FALSE;
1044    
1045  for (; *ptr != 0; ptr++)  /* If the first character is a parenthesis, check on the type of group we are
1046    dealing with. The very first call may not start with a parenthesis. */
1047    
1048    if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1049    {    {
1050    int term;    if (ptr[1] == CHAR_QUESTION_MARK &&
1051          ptr[2] == CHAR_VERTICAL_LINE)
1052        {
1053        ptr += 3;
1054        dup_parens = TRUE;
1055        }
1056    
1057      /* Handle a normal, unnamed capturing parenthesis */
1058    
1059      else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
1060        {
1061        *count += 1;
1062        if (name == NULL && *count == lorn) return *count;
1063        ptr++;
1064        }
1065    
1066      /* Handle a condition. If it is an assertion, just carry on so that it
1067      is processed as normal. If not, skip to the closing parenthesis of the
1068      condition (there can't be any nested parens. */
1069    
1070      else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1071        {
1072        ptr += 2;
1073        if (ptr[1] != CHAR_QUESTION_MARK)
1074          {
1075          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1076          if (*ptr != 0) ptr++;
1077          }
1078        }
1079    
1080      /* We have either (? or (* and not a condition */
1081    
1082      else
1083        {
1084        ptr += 2;
1085        if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
1086    
1087        /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
1088    
1089        if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1090            ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1091          {
1092          int term;
1093          const uschar *thisname;
1094          *count += 1;
1095          if (name == NULL && *count == lorn) return *count;
1096          term = *ptr++;
1097          if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1098          thisname = ptr;
1099          while (*ptr != term) ptr++;
1100          if (name != NULL && lorn == ptr - thisname &&
1101              strncmp((const char *)name, (const char *)thisname, lorn) == 0)
1102            return *count;
1103          }
1104        }
1105      }
1106    
1107    /* Past any initial parenthesis handling, scan for parentheses or vertical
1108    bars. */
1109    
1110    for (; *ptr != 0; ptr++)
1111      {
1112    /* Skip over backslashed characters and also entire \Q...\E */    /* Skip over backslashed characters and also entire \Q...\E */
1113    
1114    if (*ptr == '\\')    if (*ptr == CHAR_BACKSLASH)
1115      {      {
1116      if (*(++ptr) == 0) return -1;      if (*(++ptr) == 0) goto FAIL_EXIT;
1117      if (*ptr == 'Q') for (;;)      if (*ptr == CHAR_Q) for (;;)
1118        {        {
1119        while (*(++ptr) != 0 && *ptr != '\\');        while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1120        if (*ptr == 0) return -1;        if (*ptr == 0) goto FAIL_EXIT;
1121        if (*(++ptr) == 'E') break;        if (*(++ptr) == CHAR_E) break;
1122        }        }
1123      continue;      continue;
1124      }      }
# Line 1012  for (; *ptr != 0; ptr++) Line 1126  for (; *ptr != 0; ptr++)
1126    /* Skip over character classes; this logic must be similar to the way they    /* Skip over character classes; this logic must be similar to the way they
1127    are handled for real. If the first character is '^', skip it. Also, if the    are handled for real. If the first character is '^', skip it. Also, if the
1128    first few characters (either before or after ^) are \Q\E or \E we skip them    first few characters (either before or after ^) are \Q\E or \E we skip them
1129    too. This makes for compatibility with Perl. */    too. This makes for compatibility with Perl. Note the use of STR macros to
1130      encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
1131    
1132    if (*ptr == '[')    if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
1133      {      {
1134      BOOL negate_class = FALSE;      BOOL negate_class = FALSE;
1135      for (;;)      for (;;)
1136        {        {
1137        int c = *(++ptr);        int c = *(++ptr);
1138        if (c == '\\')        if (c == CHAR_BACKSLASH)
1139          {          {
1140          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
1141            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
1142              else break;          else if (strncmp((const char *)ptr+1,
1143                     STR_Q STR_BACKSLASH STR_E, 3) == 0)
1144              ptr += 3;
1145            else
1146              break;
1147          }          }
1148        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
1149          negate_class = TRUE;          negate_class = TRUE;
1150        else break;        else break;
1151        }        }
# Line 1034  for (; *ptr != 0; ptr++) Line 1153  for (; *ptr != 0; ptr++)
1153      /* If the next character is ']', it is a data character that must be      /* If the next character is ']', it is a data character that must be
1154      skipped, except in JavaScript compatibility mode. */      skipped, except in JavaScript compatibility mode. */
1155    
1156      if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)      if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
1157            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
1158        ptr++;        ptr++;
1159    
1160      while (*(++ptr) != ']')      while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
1161        {        {
1162        if (*ptr == 0) return -1;        if (*ptr == 0) return -1;
1163        if (*ptr == '\\')        if (*ptr == CHAR_BACKSLASH)
1164          {          {
1165          if (*(++ptr) == 0) return -1;          if (*(++ptr) == 0) goto FAIL_EXIT;
1166          if (*ptr == 'Q') for (;;)          if (*ptr == CHAR_Q) for (;;)
1167            {            {
1168            while (*(++ptr) != 0 && *ptr != '\\');            while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
1169            if (*ptr == 0) return -1;            if (*ptr == 0) goto FAIL_EXIT;
1170            if (*(++ptr) == 'E') break;            if (*(++ptr) == CHAR_E) break;
1171            }            }
1172          continue;          continue;
1173          }          }
# Line 1057  for (; *ptr != 0; ptr++) Line 1177  for (; *ptr != 0; ptr++)
1177    
1178    /* Skip comments in /x mode */    /* Skip comments in /x mode */
1179    
1180    if (xmode && *ptr == '#')    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1181      {      {
1182      while (*(++ptr) != 0 && *ptr != '\n');      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
1183      if (*ptr == 0) return -1;      if (*ptr == 0) goto FAIL_EXIT;
1184      continue;      continue;
1185      }      }
1186    
1187    /* An opening parens must now be a real metacharacter */    /* Check for the special metacharacters */
1188    
1189    if (*ptr != '(') continue;    if (*ptr == CHAR_LEFT_PARENTHESIS)
   if (ptr[1] != '?' && ptr[1] != '*')  
1190      {      {
1191      count++;      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
1192      if (name == NULL && count == lorn) return count;      if (rc > 0) return rc;
1193      continue;      if (*ptr == 0) goto FAIL_EXIT;
1194      }      }
1195    
1196    ptr += 2;    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1197    if (*ptr == 'P') ptr++;                      /* Allow optional P */      {
1198        if (dup_parens && *count < hwm_count) *count = hwm_count;
1199        *ptrptr = ptr;
1200        return -1;
1201        }
1202    
1203    /* We have to disambiguate (?<! and (?<= from (?<name> */    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
1204        {
1205        if (*count > hwm_count) hwm_count = *count;
1206        *count = start_count;
1207        }
1208      }
1209    
1210    if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&  FAIL_EXIT:
1211         *ptr != '\'')  *ptrptr = ptr;
1212      continue;  return -1;
1213    }
1214    
1215    
1216    
1217    
1218    /*************************************************
1219    *       Find forward referenced subpattern       *
1220    *************************************************/
1221    
1222    /* This function scans along a pattern's text looking for capturing
1223    subpatterns, and counting them. If it finds a named pattern that matches the
1224    name it is given, it returns its number. Alternatively, if the name is NULL, it
1225    returns when it reaches a given numbered subpattern. This is used for forward
1226    references to subpatterns. We used to be able to start this scan from the
1227    current compiling point, using the current count value from cd->bracount, and
1228    do it all in a single loop, but the addition of the possibility of duplicate
1229    subpattern numbers means that we have to scan from the very start, in order to
1230    take account of such duplicates, and to use a recursive function to keep track
1231    of the different types of group.
1232    
1233    Arguments:
1234      cd           compile background data
1235      name         name to seek, or NULL if seeking a numbered subpattern
1236      lorn         name length, or subpattern number if name is NULL
1237      xmode        TRUE if we are in /x mode
1238    
1239    Returns:       the number of the found subpattern, or -1 if not found
1240    */
1241    
1242    count++;  static int
1243    find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
1244    {
1245    uschar *ptr = (uschar *)cd->start_pattern;
1246    int count = 0;
1247    int rc;
1248    
1249    /* If the pattern does not start with an opening parenthesis, the first call
1250    to find_parens_sub() will scan right to the end (if necessary). However, if it
1251    does start with a parenthesis, find_parens_sub() will return when it hits the
1252    matching closing parens. That is why we have to have a loop. */
1253    
1254    if (name == NULL && count == lorn) return count;  for (;;)
1255    term = *ptr++;    {
1256    if (term == '<') term = '>';    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
1257    thisname = ptr;    if (rc > 0 || *ptr++ == 0) break;
   while (*ptr != term) ptr++;  
   if (name != NULL && lorn == ptr - thisname &&  
       strncmp((const char *)name, (const char *)thisname, lorn) == 0)  
     return count;  
1258    }    }
1259    
1260  return -1;  return rc;
1261  }  }
1262    
1263    
1264    
1265    
1266  /*************************************************  /*************************************************
1267  *      Find first significant op code            *  *      Find first significant op code            *
1268  *************************************************/  *************************************************/
# Line 1450  for (;;) Line 1613  for (;;)
1613        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1614        break;        break;
1615        }        }
1616    #else
1617        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1618  #endif  #endif
1619      }      }
1620    }    }
# Line 1543  for (;;) Line 1708  for (;;)
1708        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1709        break;        break;
1710        }        }
1711    #else
1712        (void)(utf8);  /* Keep compiler happy by referencing function argument */
1713  #endif  #endif
1714      }      }
1715    }    }
# Line 1609  for (code = first_significant_code(code Line 1776  for (code = first_significant_code(code
1776      BOOL empty_branch;      BOOL empty_branch;
1777      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
1778    
1779      /* Scan a closed bracket */      /* If a conditional group has only one branch, there is a second, implied,
1780        empty branch, so just skip over the conditional, because it could be empty.
1781        Otherwise, scan the individual branches of the group. */
1782    
1783      empty_branch = FALSE;      if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
     do  
       {  
       if (!empty_branch && could_be_empty_branch(code, endcode, utf8))  
         empty_branch = TRUE;  
1784        code += GET(code, 1);        code += GET(code, 1);
1785        else
1786          {
1787          empty_branch = FALSE;
1788          do
1789            {
1790            if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
1791              empty_branch = TRUE;
1792            code += GET(code, 1);
1793            }
1794          while (*code == OP_ALT);
1795          if (!empty_branch) return FALSE;   /* All branches are non-empty */
1796        }        }
1797      while (*code == OP_ALT);  
     if (!empty_branch) return FALSE;   /* All branches are non-empty */  
1798      c = *code;      c = *code;
1799      continue;      continue;
1800      }      }
# Line 1821  int terminator; /* Don't combin Line 1996  int terminator; /* Don't combin
1996  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1997  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
1998    {    {
1999    if (*ptr == '\\' && ptr[1] == ']') ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
2000      {      {
2001      if (*ptr == ']') return FALSE;      if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
2002      if (*ptr == terminator && ptr[1] == ']')      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2003        {        {
2004        *endptr = ptr;        *endptr = ptr;
2005        return TRUE;        return TRUE;
# Line 2070  if ((options & PCRE_EXTENDED) != 0) Line 2245  if ((options & PCRE_EXTENDED) != 0)
2245    for (;;)    for (;;)
2246      {      {
2247      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2248      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2249        {        {
2250        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2251          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2082  if ((options & PCRE_EXTENDED) != 0) Line 2257  if ((options & PCRE_EXTENDED) != 0)
2257  /* If the next item is one that we can handle, get its value. A non-negative  /* If the next item is one that we can handle, get its value. A non-negative
2258  value is a character, a negative value is an escape value. */  value is a character, a negative value is an escape value. */
2259    
2260  if (*ptr == '\\')  if (*ptr == CHAR_BACKSLASH)
2261    {    {
2262    int temperrorcode = 0;    int temperrorcode = 0;
2263    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);    next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
# Line 2107  if ((options & PCRE_EXTENDED) != 0) Line 2282  if ((options & PCRE_EXTENDED) != 0)
2282    for (;;)    for (;;)
2283      {      {
2284      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2285      if (*ptr == '#')      if (*ptr == CHAR_NUMBER_SIGN)
2286        {        {
2287        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2288          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
# Line 2118  if ((options & PCRE_EXTENDED) != 0) Line 2293  if ((options & PCRE_EXTENDED) != 0)
2293    
2294  /* If the next thing is itself optional, we have to give up. */  /* If the next thing is itself optional, we have to give up. */
2295    
2296  if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)  if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2297    return FALSE;    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2298        return FALSE;
2299    
2300  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. If the previous is a
2301  positive single character match, "item" either contains the character or, if  positive single character match, "item" either contains the character or, if
# Line 2134  if (next >= 0) switch(op_code) Line 2310  if (next >= 0) switch(op_code)
2310    case OP_CHAR:    case OP_CHAR:
2311  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2312    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
2313    #else
2314      (void)(utf8_char);  /* Keep compiler happy by referencing function argument */
2315  #endif  #endif
2316    return item != next;    return item != next;
2317    
# Line 2555  for (;; ptr++) Line 2733  for (;; ptr++)
2733    
2734    if (inescq && c != 0)    if (inescq && c != 0)
2735      {      {
2736      if (c == '\\' && ptr[1] == 'E')      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
2737        {        {
2738        inescq = FALSE;        inescq = FALSE;
2739        ptr++;        ptr++;
# Line 2581  for (;; ptr++) Line 2759  for (;; ptr++)
2759    /* Fill in length of a previous callout, except when the next thing is    /* Fill in length of a previous callout, except when the next thing is
2760    a quantifier. */    a quantifier. */
2761    
2762    is_quantifier = c == '*' || c == '+' || c == '?' ||    is_quantifier =
2763      (c == '{' && is_counted_repeat(ptr+1));      c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
2764        (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
2765    
2766    if (!is_quantifier && previous_callout != NULL &&    if (!is_quantifier && previous_callout != NULL &&
2767         after_manual_callout-- <= 0)         after_manual_callout-- <= 0)
# Line 2597  for (;; ptr++) Line 2776  for (;; ptr++)
2776    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2777      {      {
2778      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
2779      if (c == '#')      if (c == CHAR_NUMBER_SIGN)
2780        {        {
2781        while (*(++ptr) != 0)        while (*(++ptr) != 0)
2782          {          {
# Line 2622  for (;; ptr++) Line 2801  for (;; ptr++)
2801      {      {
2802      /* ===================================================================*/      /* ===================================================================*/
2803      case 0:                        /* The branch terminates at string end */      case 0:                        /* The branch terminates at string end */
2804      case '|':                      /* or | or ) */      case CHAR_VERTICAL_LINE:       /* or | or ) */
2805      case ')':      case CHAR_RIGHT_PARENTHESIS:
2806      *firstbyteptr = firstbyte;      *firstbyteptr = firstbyte;
2807      *reqbyteptr = reqbyte;      *reqbyteptr = reqbyte;
2808      *codeptr = code;      *codeptr = code;
# Line 2645  for (;; ptr++) Line 2824  for (;; ptr++)
2824      /* Handle single-character metacharacters. In multiline mode, ^ disables      /* Handle single-character metacharacters. In multiline mode, ^ disables
2825      the setting of any following char as a first character. */      the setting of any following char as a first character. */
2826    
2827      case '^':      case CHAR_CIRCUMFLEX_ACCENT:
2828      if ((options & PCRE_MULTILINE) != 0)      if ((options & PCRE_MULTILINE) != 0)
2829        {        {
2830        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2654  for (;; ptr++) Line 2833  for (;; ptr++)
2833      *code++ = OP_CIRC;      *code++ = OP_CIRC;
2834      break;      break;
2835    
2836      case '$':      case CHAR_DOLLAR_SIGN:
2837      previous = NULL;      previous = NULL;
2838      *code++ = OP_DOLL;      *code++ = OP_DOLL;
2839      break;      break;
# Line 2662  for (;; ptr++) Line 2841  for (;; ptr++)
2841      /* There can never be a first char if '.' is first, whatever happens about      /* There can never be a first char if '.' is first, whatever happens about
2842      repeats. The value of reqbyte doesn't change either. */      repeats. The value of reqbyte doesn't change either. */
2843    
2844      case '.':      case CHAR_DOT:
2845      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
2846      zerofirstbyte = firstbyte;      zerofirstbyte = firstbyte;
2847      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
# Line 2686  for (;; ptr++) Line 2865  for (;; ptr++)
2865      In JavaScript compatibility mode, an isolated ']' causes an error. In      In JavaScript compatibility mode, an isolated ']' causes an error. In
2866      default (Perl) mode, it is treated as a data character. */      default (Perl) mode, it is treated as a data character. */
2867    
2868      case ']':      case CHAR_RIGHT_SQUARE_BRACKET:
2869      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2870        {        {
2871        *errorcodeptr = ERR64;        *errorcodeptr = ERR64;
# Line 2694  for (;; ptr++) Line 2873  for (;; ptr++)
2873        }        }
2874      goto NORMAL_CHAR;      goto NORMAL_CHAR;
2875    
2876      case '[':      case CHAR_LEFT_SQUARE_BRACKET:
2877      previous = code;      previous = code;
2878    
2879      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
2880      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2881    
2882      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2883             ptr[1] == CHAR_EQUALS_SIGN) &&
2884          check_posix_syntax(ptr, &tempptr))          check_posix_syntax(ptr, &tempptr))
2885        {        {
2886        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
2887        goto FAILED;        goto FAILED;
2888        }        }
2889    
# Line 2715  for (;; ptr++) Line 2895  for (;; ptr++)
2895      for (;;)      for (;;)
2896        {        {
2897        c = *(++ptr);        c = *(++ptr);
2898        if (c == '\\')        if (c == CHAR_BACKSLASH)
2899          {          {
2900          if (ptr[1] == 'E') ptr++;          if (ptr[1] == CHAR_E)
2901            else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;            ptr++;
2902              else break;          else if (strncmp((const char *)ptr+1,
2903                              STR_Q STR_BACKSLASH STR_E, 3) == 0)
2904              ptr += 3;
2905            else
2906              break;
2907          }          }
2908        else if (!negate_class && c == '^')        else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
2909          negate_class = TRUE;          negate_class = TRUE;
2910        else break;        else break;
2911        }        }
# Line 2731  for (;; ptr++) Line 2915  for (;; ptr++)
2915      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas      that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
2916      [^] must match any character, so generate OP_ALLANY. */      [^] must match any character, so generate OP_ALLANY. */
2917    
2918      if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)      if (c == CHAR_RIGHT_SQUARE_BRACKET &&
2919            (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
2920        {        {
2921        *code++ = negate_class? OP_ALLANY : OP_FAIL;        *code++ = negate_class? OP_ALLANY : OP_FAIL;
2922        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
# Line 2796  for (;; ptr++) Line 2981  for (;; ptr++)
2981    
2982        if (inescq)        if (inescq)
2983          {          {
2984          if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */          if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
2985            {            {
2986            inescq = FALSE;                   /* Reset literal state */            inescq = FALSE;                   /* Reset literal state */
2987            ptr++;                            /* Skip the 'E' */            ptr++;                            /* Skip the 'E' */
# Line 2811  for (;; ptr++) Line 2996  for (;; ptr++)
2996        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl        [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
2997        5.6 and 5.8 do. */        5.6 and 5.8 do. */
2998    
2999        if (c == '[' &&        if (c == CHAR_LEFT_SQUARE_BRACKET &&
3000            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
3001            check_posix_syntax(ptr, &tempptr))             ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
3002          {          {
3003          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
3004          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
3005          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
3006          uschar pbits[32];          uschar pbits[32];
3007    
3008          if (ptr[1] != ':')          if (ptr[1] != CHAR_COLON)
3009            {            {
3010            *errorcodeptr = ERR31;            *errorcodeptr = ERR31;
3011            goto FAILED;            goto FAILED;
3012            }            }
3013    
3014          ptr += 2;          ptr += 2;
3015          if (*ptr == '^')          if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
3016            {            {
3017            local_negate = TRUE;            local_negate = TRUE;
3018            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
# Line 2900  for (;; ptr++) Line 3085  for (;; ptr++)
3085        to 'or' into the one we are building. We assume they have more than one        to 'or' into the one we are building. We assume they have more than one
3086        character in them, so set class_charcount bigger than one. */        character in them, so set class_charcount bigger than one. */
3087    
3088        if (c == '\\')        if (c == CHAR_BACKSLASH)
3089          {          {
3090          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3091          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3092    
3093          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
3094          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
3095          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
3096          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3097            {            {
3098            if (ptr[1] == '\\' && ptr[2] == 'E')            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3099              {              {
3100              ptr += 2; /* avoid empty string */              ptr += 2; /* avoid empty string */
3101              }              }
# Line 3136  for (;; ptr++) Line 3321  for (;; ptr++)
3321        entirely. The code for handling \Q and \E is messy. */        entirely. The code for handling \Q and \E is messy. */
3322    
3323        CHECK_RANGE:        CHECK_RANGE:
3324        while (ptr[1] == '\\' && ptr[2] == 'E')        while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
3325          {          {
3326          inescq = FALSE;          inescq = FALSE;
3327          ptr += 2;          ptr += 2;
# Line 3146  for (;; ptr++) Line 3331  for (;; ptr++)
3331    
3332        /* Remember \r or \n */        /* Remember \r or \n */
3333    
3334        if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;        if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3335    
3336        /* Check for range */        /* Check for range */
3337    
3338        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == CHAR_MINUS)
3339          {          {
3340          int d;          int d;
3341          ptr += 2;          ptr += 2;
3342          while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
3343    
3344          /* If we hit \Q (not followed by \E) at this point, go into escaped          /* If we hit \Q (not followed by \E) at this point, go into escaped
3345          mode. */          mode. */
3346    
3347          while (*ptr == '\\' && ptr[1] == 'Q')          while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
3348            {            {
3349            ptr += 2;            ptr += 2;
3350            if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }            if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
3351                { ptr += 2; continue; }
3352            inescq = TRUE;            inescq = TRUE;
3353            break;            break;
3354            }            }
3355    
3356          if (*ptr == 0 || (!inescq && *ptr == ']'))          if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
3357            {            {
3358            ptr = oldptr;            ptr = oldptr;
3359            goto LONE_SINGLE_CHARACTER;            goto LONE_SINGLE_CHARACTER;
# Line 3186  for (;; ptr++) Line 3372  for (;; ptr++)
3372          not any of the other escapes. Perl 5.6 treats a hyphen as a literal          not any of the other escapes. Perl 5.6 treats a hyphen as a literal
3373          in such circumstances. */          in such circumstances. */
3374    
3375          if (!inescq && d == '\\')          if (!inescq && d == CHAR_BACKSLASH)
3376            {            {
3377            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3378            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
# Line 3196  for (;; ptr++) Line 3382  for (;; ptr++)
3382    
3383            if (d < 0)            if (d < 0)
3384              {              {
3385              if (d == -ESC_b) d = '\b';              if (d == -ESC_b) d = CHAR_BS;
3386              else if (d == -ESC_X) d = 'X';              else if (d == -ESC_X) d = CHAR_X;
3387              else if (d == -ESC_R) d = 'R'; else              else if (d == -ESC_R) d = CHAR_R; else
3388                {                {
3389                ptr = oldptr;                ptr = oldptr;
3390                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3219  for (;; ptr++) Line 3405  for (;; ptr++)
3405    
3406          /* Remember \r or \n */          /* Remember \r or \n */
3407    
3408          if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;          if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
3409    
3410          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3411          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
# Line 3366  for (;; ptr++) Line 3552  for (;; ptr++)
3552    
3553      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" above. */
3554    
3555      while ((c = *(++ptr)) != 0 && (c != ']' || inescq));      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3556    
3557      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)                          /* Missing terminating ']' */
3558        {        {
# Line 3511  we set the flag only if there is a liter Line 3697  we set the flag only if there is a liter
3697      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this      /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
3698      has been tested above. */      has been tested above. */
3699    
3700      case '{':      case CHAR_LEFT_CURLY_BRACKET:
3701      if (!is_quantifier) goto NORMAL_CHAR;      if (!is_quantifier) goto NORMAL_CHAR;
3702      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
3703      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
3704      goto REPEAT;      goto REPEAT;
3705    
3706      case '*':      case CHAR_ASTERISK:
3707      repeat_min = 0;      repeat_min = 0;
3708      repeat_max = -1;      repeat_max = -1;
3709      goto REPEAT;      goto REPEAT;
3710    
3711      case '+':      case CHAR_PLUS:
3712      repeat_min = 1;      repeat_min = 1;
3713      repeat_max = -1;      repeat_max = -1;
3714      goto REPEAT;      goto REPEAT;
3715    
3716      case '?':      case CHAR_QUESTION_MARK:
3717      repeat_min = 0;      repeat_min = 0;
3718      repeat_max = 1;      repeat_max = 1;
3719    
# Line 3562  we set the flag only if there is a liter Line 3748  we set the flag only if there is a liter
3748      but if PCRE_UNGREEDY is set, it works the other way round. We change the      but if PCRE_UNGREEDY is set, it works the other way round. We change the
3749      repeat type to the non-default. */      repeat type to the non-default. */
3750    
3751      if (ptr[1] == '+')      if (ptr[1] == CHAR_PLUS)
3752        {        {
3753        repeat_type = 0;                  /* Force greedy */        repeat_type = 0;                  /* Force greedy */
3754        possessive_quantifier = TRUE;        possessive_quantifier = TRUE;
3755        ptr++;        ptr++;
3756        }        }
3757      else if (ptr[1] == '?')      else if (ptr[1] == CHAR_QUESTION_MARK)
3758        {        {
3759        repeat_type = greedy_non_default;        repeat_type = greedy_non_default;
3760        ptr++;        ptr++;
# Line 4201  we set the flag only if there is a liter Line 4387  we set the flag only if there is a liter
4387      lookbehind or option setting or condition or all the other extended      lookbehind or option setting or condition or all the other extended
4388      parenthesis forms.  */      parenthesis forms.  */
4389    
4390      case '(':      case CHAR_LEFT_PARENTHESIS:
4391      newoptions = options;      newoptions = options;
4392      skipbytes = 0;      skipbytes = 0;
4393      bravalue = OP_CBRA;      bravalue = OP_CBRA;
# Line 4210  we set the flag only if there is a liter Line 4396  we set the flag only if there is a liter
4396    
4397      /* First deal with various "verbs" that can be introduced by '*'. */      /* First deal with various "verbs" that can be introduced by '*'. */
4398    
4399      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4400        {        {
4401        int i, namelen;        int i, namelen;
4402        const char *vn = verbnames;        const char *vn = verbnames;
4403        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4404        previous = NULL;        previous = NULL;
4405        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4406        if (*ptr == ':')        if (*ptr == CHAR_COLON)
4407          {          {
4408          *errorcodeptr = ERR59;   /* Not supported */          *errorcodeptr = ERR59;   /* Not supported */
4409          goto FAILED;          goto FAILED;
4410          }          }
4411        if (*ptr != ')')        if (*ptr != CHAR_RIGHT_PARENTHESIS)
4412          {          {
4413          *errorcodeptr = ERR60;          *errorcodeptr = ERR60;
4414          goto FAILED;          goto FAILED;
# Line 4247  we set the flag only if there is a liter Line 4433  we set the flag only if there is a liter
4433      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
4434      appearance of any of them means that this is not a capturing group. */      appearance of any of them means that this is not a capturing group. */
4435    
4436      else if (*ptr == '?')      else if (*ptr == CHAR_QUESTION_MARK)
4437        {        {
4438        int i, set, unset, namelen;        int i, set, unset, namelen;
4439        int *optset;        int *optset;
# Line 4256  we set the flag only if there is a liter Line 4442  we set the flag only if there is a liter
4442    
4443        switch (*(++ptr))        switch (*(++ptr))
4444          {          {
4445          case '#':                 /* Comment; skip to ket */          case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
4446          ptr++;          ptr++;
4447          while (*ptr != 0 && *ptr != ')') ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4448          if (*ptr == 0)          if (*ptr == 0)
4449            {            {
4450            *errorcodeptr = ERR18;            *errorcodeptr = ERR18;
# Line 4268  we set the flag only if there is a liter Line 4454  we set the flag only if there is a liter
4454    
4455    
4456          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4457          case '|':                 /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
4458          reset_bracount = TRUE;          reset_bracount = TRUE;
4459          /* Fall through */          /* Fall through */
4460    
4461          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4462          case ':':                 /* Non-capturing bracket */          case CHAR_COLON:          /* Non-capturing bracket */
4463          bravalue = OP_BRA;          bravalue = OP_BRA;
4464          ptr++;          ptr++;
4465          break;          break;
4466    
4467    
4468          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4469          case '(':          case CHAR_LEFT_PARENTHESIS:
4470          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
4471    
4472          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
# Line 4300  we set the flag only if there is a liter Line 4486  we set the flag only if there is a liter
4486          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
4487          including assertions, are processed. */          including assertions, are processed. */
4488    
4489          if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))          if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
4490                ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
4491            break;            break;
4492    
4493          /* Most other conditions use OP_CREF (a couple change to OP_RREF          /* Most other conditions use OP_CREF (a couple change to OP_RREF
# Line 4312  we set the flag only if there is a liter Line 4499  we set the flag only if there is a liter
4499    
4500          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
4501    
4502          if (ptr[1] == 'R' && ptr[2] == '&')          if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
4503            {            {
4504            terminator = -1;            terminator = -1;
4505            ptr += 2;            ptr += 2;
# Line 4322  we set the flag only if there is a liter Line 4509  we set the flag only if there is a liter
4509          /* Check for a test for a named group's having been set, using the Perl          /* Check for a test for a named group's having been set, using the Perl
4510          syntax (?(<name>) or (?('name') */          syntax (?(<name>) or (?('name') */
4511    
4512          else if (ptr[1] == '<')          else if (ptr[1] == CHAR_LESS_THAN_SIGN)
4513            {            {
4514            terminator = '>';            terminator = CHAR_GREATER_THAN_SIGN;
4515            ptr++;            ptr++;
4516            }            }
4517          else if (ptr[1] == '\'')          else if (ptr[1] == CHAR_APOSTROPHE)
4518            {            {
4519            terminator = '\'';            terminator = CHAR_APOSTROPHE;
4520            ptr++;            ptr++;
4521            }            }
4522          else          else
4523            {            {
4524            terminator = 0;            terminator = 0;
4525            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
4526            }            }
4527    
4528          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
# Line 4355  we set the flag only if there is a liter Line 4542  we set the flag only if there is a liter
4542            {            {
4543            if (recno >= 0)            if (recno >= 0)
4544              recno = ((digitab[*ptr] & ctype_digit) != 0)?              recno = ((digitab[*ptr] & ctype_digit) != 0)?
4545                recno * 10 + *ptr - '0' : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4546            ptr++;            ptr++;
4547            }            }
4548          namelen = ptr - name;          namelen = ptr - name;
4549    
4550          if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')          if ((terminator > 0 && *ptr++ != terminator) ||
4551                *ptr++ != CHAR_RIGHT_PARENTHESIS)
4552            {            {
4553            ptr--;      /* Error offset */            ptr--;      /* Error offset */
4554            *errorcodeptr = ERR26;            *errorcodeptr = ERR26;
# Line 4382  we set the flag only if there is a liter Line 4570  we set the flag only if there is a liter
4570              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4571              goto FAILED;              goto FAILED;
4572              }              }
4573            recno = (refsign == '-')?            recno = (refsign == CHAR_MINUS)?
4574              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4575            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4576              {              {
# Line 4413  we set the flag only if there is a liter Line 4601  we set the flag only if there is a liter
4601    
4602          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
4603    
4604          else if ((i = find_parens(ptr, cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
4605                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0)) > 0)
4606            {            {
4607            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
# Line 4434  we set the flag only if there is a liter Line 4622  we set the flag only if there is a liter
4622          /* Check for (?(R) for recursion. Allow digits after R to specify a          /* Check for (?(R) for recursion. Allow digits after R to specify a
4623          specific group number. */          specific group number. */
4624    
4625          else if (*name == 'R')          else if (*name == CHAR_R)
4626            {            {
4627            recno = 0;            recno = 0;
4628            for (i = 1; i < namelen; i++)            for (i = 1; i < namelen; i++)
# Line 4444  we set the flag only if there is a liter Line 4632  we set the flag only if there is a liter
4632                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4633                goto FAILED;                goto FAILED;
4634                }                }
4635              recno = recno * 10 + name[i] - '0';              recno = recno * 10 + name[i] - CHAR_0;
4636              }              }
4637            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
4638            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */            code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
# Line 4454  we set the flag only if there is a liter Line 4642  we set the flag only if there is a liter
4642          /* Similarly, check for the (?(DEFINE) "condition", which is always          /* Similarly, check for the (?(DEFINE) "condition", which is always
4643          false. */          false. */
4644    
4645          else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)          else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
4646            {            {
4647            code[1+LINK_SIZE] = OP_DEF;            code[1+LINK_SIZE] = OP_DEF;
4648            skipbytes = 1;            skipbytes = 1;
# Line 4479  we set the flag only if there is a liter Line 4667  we set the flag only if there is a liter
4667    
4668    
4669          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4670          case '=':                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
4671          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
4672          ptr++;          ptr++;
4673          break;          break;
4674    
4675    
4676          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4677          case '!':                 /* Negative lookahead */          case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
4678          ptr++;          ptr++;
4679          if (*ptr == ')')          /* Optimize (?!) */          if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
4680            {            {
4681            *code++ = OP_FAIL;            *code++ = OP_FAIL;
4682            previous = NULL;            previous = NULL;
# Line 4499  we set the flag only if there is a liter Line 4687  we set the flag only if there is a liter
4687    
4688    
4689          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4690          case '<':                 /* Lookbehind or named define */          case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
4691          switch (ptr[1])          switch (ptr[1])
4692            {            {
4693            case '=':               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
4694            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
4695            ptr += 2;            ptr += 2;
4696            break;            break;
4697    
4698            case '!':               /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
4699            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
4700            ptr += 2;            ptr += 2;
4701            break;            break;
# Line 4522  we set the flag only if there is a liter Line 4710  we set the flag only if there is a liter
4710    
4711    
4712          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4713          case '>':                 /* One-time brackets */          case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
4714          bravalue = OP_ONCE;          bravalue = OP_ONCE;
4715          ptr++;          ptr++;
4716          break;          break;
4717    
4718    
4719          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4720          case 'C':                 /* Callout - may be followed by digits; */          case CHAR_C:                 /* Callout - may be followed by digits; */
4721          previous_callout = code;  /* Save for later completion */          previous_callout = code;  /* Save for later completion */
4722          after_manual_callout = 1; /* Skip one item before completing */          after_manual_callout = 1; /* Skip one item before completing */
4723          *code++ = OP_CALLOUT;          *code++ = OP_CALLOUT;
4724            {            {
4725            int n = 0;            int n = 0;
4726            while ((digitab[*(++ptr)] & ctype_digit) != 0)            while ((digitab[*(++ptr)] & ctype_digit) != 0)
4727              n = n * 10 + *ptr - '0';              n = n * 10 + *ptr - CHAR_0;
4728            if (*ptr != ')')            if (*ptr != CHAR_RIGHT_PARENTHESIS)
4729              {              {
4730              *errorcodeptr = ERR39;              *errorcodeptr = ERR39;
4731              goto FAILED;              goto FAILED;
# Line 4557  we set the flag only if there is a liter Line 4745  we set the flag only if there is a liter
4745    
4746    
4747          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4748          case 'P':                 /* Python-style named subpattern handling */          case CHAR_P:              /* Python-style named subpattern handling */
4749          if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */          if (*(++ptr) == CHAR_EQUALS_SIGN ||
4750                *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
4751            {            {
4752            is_recurse = *ptr == '>';            is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
4753            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4754            goto NAMED_REF_OR_RECURSE;            goto NAMED_REF_OR_RECURSE;
4755            }            }
4756          else if (*ptr != '<')    /* Test for Python-style definition */          else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
4757            {            {
4758            *errorcodeptr = ERR41;            *errorcodeptr = ERR41;
4759            goto FAILED;            goto FAILED;
# Line 4574  we set the flag only if there is a liter Line 4763  we set the flag only if there is a liter
4763    
4764          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4765          DEFINE_NAME:    /* Come here from (?< handling */          DEFINE_NAME:    /* Come here from (?< handling */
4766          case '\'':          case CHAR_APOSTROPHE:
4767            {            {
4768            terminator = (*ptr == '<')? '>' : '\'';            terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
4769                CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
4770            name = ++ptr;            name = ++ptr;
4771    
4772            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
# Line 4650  we set the flag only if there is a liter Line 4840  we set the flag only if there is a liter
4840    
4841    
4842          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4843          case '&':                 /* Perl recursion/subroutine syntax */          case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
4844          terminator = ')';          terminator = CHAR_RIGHT_PARENTHESIS;
4845          is_recurse = TRUE;          is_recurse = TRUE;
4846          /* Fall through */          /* Fall through */
4847    
# Line 4710  we set the flag only if there is a liter Line 4900  we set the flag only if there is a liter
4900              recno = GET2(slot, 0);              recno = GET2(slot, 0);
4901              }              }
4902            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
4903                      find_parens(ptr, cd, name, namelen,                      find_parens(cd, name, namelen,
4904                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0)) <= 0)
4905              {              {
4906              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
# Line 4726  we set the flag only if there is a liter Line 4916  we set the flag only if there is a liter
4916    
4917    
4918          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4919          case 'R':                 /* Recursion */          case CHAR_R:              /* Recursion */
4920          ptr++;                    /* Same as (?0)      */          ptr++;                    /* Same as (?0)      */
4921          /* Fall through */          /* Fall through */
4922    
4923    
4924          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4925          case '-': case '+':          case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
4926          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
4927          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
4928            {            {
4929            const uschar *called;            const uschar *called;
4930            terminator = ')';            terminator = CHAR_RIGHT_PARENTHESIS;
4931    
4932            /* Come here from the \g<...> and \g'...' code (Oniguruma            /* Come here from the \g<...> and \g'...' code (Oniguruma
4933            compatibility). However, the syntax has been checked to ensure that            compatibility). However, the syntax has been checked to ensure that
# Line 4747  we set the flag only if there is a liter Line 4937  we set the flag only if there is a liter
4937    
4938            HANDLE_NUMERICAL_RECURSION:            HANDLE_NUMERICAL_RECURSION:
4939    
4940            if ((refsign = *ptr) == '+')            if ((refsign = *ptr) == CHAR_PLUS)
4941              {              {
4942              ptr++;              ptr++;
4943              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
# Line 4756  we set the flag only if there is a liter Line 4946  we set the flag only if there is a liter
4946                goto FAILED;                goto FAILED;
4947                }                }
4948              }              }
4949            else if (refsign == '-')            else if (refsign == CHAR_MINUS)
4950              {              {
4951              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4952                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
# Line 4765  we set the flag only if there is a liter Line 4955  we set the flag only if there is a liter
4955    
4956            recno = 0;            recno = 0;
4957            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4958              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - CHAR_0;
4959    
4960            if (*ptr != terminator)            if (*ptr != terminator)
4961              {              {
# Line 4773  we set the flag only if there is a liter Line 4963  we set the flag only if there is a liter
4963              goto FAILED;              goto FAILED;
4964              }              }
4965    
4966            if (refsign == '-')            if (refsign == CHAR_MINUS)
4967              {              {
4968              if (recno == 0)              if (recno == 0)
4969                {                {
# Line 4787  we set the flag only if there is a liter Line 4977  we set the flag only if there is a liter
4977                goto FAILED;                goto FAILED;
4978                }                }
4979              }              }
4980            else if (refsign == '+')            else if (refsign == CHAR_PLUS)
4981              {              {
4982              if (recno == 0)              if (recno == 0)
4983                {                {
# Line 4820  we set the flag only if there is a liter Line 5010  we set the flag only if there is a liter
5010    
5011              if (called == NULL)              if (called == NULL)
5012                {                {
5013                if (find_parens(ptr, cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5014                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0) < 0)
5015                  {                  {
5016                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
# Line 4873  we set the flag only if there is a liter Line 5063  we set the flag only if there is a liter
5063          set = unset = 0;          set = unset = 0;
5064          optset = &set;          optset = &set;
5065    
5066          while (*ptr != ')' && *ptr != ':')          while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
5067            {            {
5068            switch (*ptr++)            switch (*ptr++)
5069              {              {
5070              case '-': optset = &unset; break;              case CHAR_MINUS: optset = &unset; break;
5071    
5072              case 'J':    /* Record that it changed in the external options */              case CHAR_J:    /* Record that it changed in the external options */
5073              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
5074              cd->external_flags |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
5075              break;              break;
5076    
5077              case 'i': *optset |= PCRE_CASELESS; break;              case CHAR_i: *optset |= PCRE_CASELESS; break;
5078              case 'm': *optset |= PCRE_MULTILINE; break;              case CHAR_m: *optset |= PCRE_MULTILINE; break;
5079              case 's': *optset |= PCRE_DOTALL; break;              case CHAR_s: *optset |= PCRE_DOTALL; break;
5080              case 'x': *optset |= PCRE_EXTENDED; break;              case CHAR_x: *optset |= PCRE_EXTENDED; break;
5081              case 'U': *optset |= PCRE_UNGREEDY; break;              case CHAR_U: *optset |= PCRE_UNGREEDY; break;
5082              case 'X': *optset |= PCRE_EXTRA; break;              case CHAR_X: *optset |= PCRE_EXTRA; break;
5083    
5084              default:  *errorcodeptr = ERR12;              default:  *errorcodeptr = ERR12;
5085                        ptr--;    /* Correct the offset */                        ptr--;    /* Correct the offset */
# Line 4920  we set the flag only if there is a liter Line 5110  we set the flag only if there is a liter
5110          both phases.          both phases.
5111    
5112          If we are not at the pattern start, compile code to change the ims          If we are not at the pattern start, compile code to change the ims
5113          options if this setting actually changes any of them, and reset the          options if this setting actually changes any of them, and reset the
5114          greedy defaults and the case value for firstbyte and reqbyte. */          greedy defaults and the case value for firstbyte and reqbyte. */
5115    
5116          if (*ptr == ')')          if (*ptr == CHAR_RIGHT_PARENTHESIS)
5117            {            {
5118            if (code == cd->start_code + 1 + LINK_SIZE &&            if (code == cd->start_code + 1 + LINK_SIZE &&
5119                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))                 (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
# Line 4944  we set the flag only if there is a liter Line 5134  we set the flag only if there is a liter
5134    
5135            /* Change options at this level, and pass them back for use            /* Change options at this level, and pass them back for use
5136            in subsequent branches. When not at the start of the pattern, this            in subsequent branches. When not at the start of the pattern, this
5137            information is also necessary so that a resetting item can be            information is also necessary so that a resetting item can be
5138            compiled at the end of a group (if we are in a group). */            compiled at the end of a group (if we are in a group). */
5139    
5140            *optionsptr = options = newoptions;            *optionsptr = options = newoptions;
# Line 5063  we set the flag only if there is a liter Line 5253  we set the flag only if there is a liter
5253    
5254      /* Error if hit end of pattern */      /* Error if hit end of pattern */
5255    
5256      if (*ptr != ')')      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5257        {        {
5258        *errorcodeptr = ERR14;        *errorcodeptr = ERR14;
5259        goto FAILED;        goto FAILED;
# Line 5161  we set the flag only if there is a liter Line 5351  we set the flag only if there is a liter
5351      We can test for values between ESC_b and ESC_Z for the latter; this may      We can test for values between ESC_b and ESC_Z for the latter; this may
5352      have to change if any new ones are ever created. */      have to change if any new ones are ever created. */
5353    
5354      case '\\':      case CHAR_BACKSLASH:
5355      tempptr = ptr;      tempptr = ptr;
5356      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);      c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
5357      if (*errorcodeptr != 0) goto FAILED;      if (*errorcodeptr != 0) goto FAILED;
# Line 5170  we set the flag only if there is a liter Line 5360  we set the flag only if there is a liter
5360        {        {
5361        if (-c == ESC_Q)            /* Handle start of quoted string */        if (-c == ESC_Q)            /* Handle start of quoted string */
5362          {          {
5363          if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
5364            else inescq = TRUE;            ptr += 2;               /* avoid empty string */
5365                else inescq = TRUE;
5366          continue;          continue;
5367          }          }
5368    
# Line 5199  we set the flag only if there is a liter Line 5390  we set the flag only if there is a liter
5390          {          {
5391          const uschar *p;          const uschar *p;
5392          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */          save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
5393          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5394              CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
5395    
5396          /* These two statements stop the compiler for warning about possibly          /* These two statements stop the compiler for warning about possibly
5397          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In          unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
# Line 5211  we set the flag only if there is a liter Line 5403  we set the flag only if there is a liter
5403    
5404          /* Test for a name */          /* Test for a name */
5405    
5406          if (ptr[1] != '+' && ptr[1] != '-')          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
5407            {            {
5408            BOOL isnumber = TRUE;            BOOL isnumber = TRUE;
5409            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != 0 && *p != terminator; p++)
# Line 5249  we set the flag only if there is a liter Line 5441  we set the flag only if there is a liter
5441        /* \k<name> or \k'name' is a back reference by name (Perl syntax).        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
5442        We also support \k{name} (.NET syntax) */        We also support \k{name} (.NET syntax) */
5443    
5444        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))        if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
5445              ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
5446          {          {
5447          is_recurse = FALSE;          is_recurse = FALSE;
5448          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
5449              CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
5450              CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
5451          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
5452          }          }
5453    
# Line 5355  we set the flag only if there is a liter Line 5550  we set the flag only if there is a liter
5550    
5551      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5552    
5553      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
5554        cd->external_flags |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5555    
5556      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
# Line 5600  for (;;) Line 5795  for (;;)
5795    compile a resetting op-code following, except at the very end of the pattern.    compile a resetting op-code following, except at the very end of the pattern.
5796    Return leaving the pointer at the terminating char. */    Return leaving the pointer at the terminating char. */
5797    
5798    if (*ptr != '|')    if (*ptr != CHAR_VERTICAL_LINE)
5799      {      {
5800      if (lengthptr == NULL)      if (lengthptr == NULL)
5801        {        {
# Line 5623  for (;;) Line 5818  for (;;)
5818    
5819      /* Resetting option if needed */      /* Resetting option if needed */
5820    
5821      if ((options & PCRE_IMS) != oldims && *ptr == ')')      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
5822        {        {
5823        *code++ = OP_OPT;        *code++ = OP_OPT;
5824        *code++ = oldims;        *code++ = oldims;
# Line 5805  do { Line 6000  do {
6000       NULL, 0, FALSE);       NULL, 0, FALSE);
6001     register int op = *scode;     register int op = *scode;
6002    
6003       /* If we are at the start of a conditional assertion group, *both* the
6004       conditional assertion *and* what follows the condition must satisfy the test
6005       for start of line. Other kinds of condition fail. Note that there may be an
6006       auto-callout at the start of a condition. */
6007    
6008       if (op == OP_COND)
6009         {
6010         scode += 1 + LINK_SIZE;
6011         if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
6012         switch (*scode)
6013           {
6014           case OP_CREF:
6015           case OP_RREF:
6016           case OP_DEF:
6017           return FALSE;
6018    
6019           default:     /* Assertion */
6020           if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6021           do scode += GET(scode, 1); while (*scode == OP_ALT);
6022           scode += 1 + LINK_SIZE;
6023           break;
6024           }
6025         scode = first_significant_code(scode, NULL, 0, FALSE);
6026         op = *scode;
6027         }
6028    
6029     /* Non-capturing brackets */     /* Non-capturing brackets */
6030    
6031     if (op == OP_BRA)     if (op == OP_BRA)
# Line 5823  do { Line 6044  do {
6044    
6045     /* Other brackets */     /* Other brackets */
6046    
6047     else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     else if (op == OP_ASSERT || op == OP_ONCE)
6048       { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }       {
6049         if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
6050         }
6051    
6052     /* .* means "start at start or after \n" if it isn't in brackets that     /* .* means "start at start or after \n" if it isn't in brackets that
6053     may be referenced. */     may be referenced. */
# Line 6021  if ((options & PCRE_UTF8) != 0) Line 6244  if ((options & PCRE_UTF8) != 0)
6244    }    }
6245  #endif  #endif
6246    
6247  if ((options & ~PUBLIC_OPTIONS) != 0)  if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
6248    {    {
6249    errorcode = ERR17;    errorcode = ERR17;
6250    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
# Line 6038  cd->ctypes = tables + ctypes_offset; Line 6261  cd->ctypes = tables + ctypes_offset;
6261  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
6262  the offset for later. */  the offset for later. */
6263    
6264  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
6265           ptr[skipatstart+1] == CHAR_ASTERISK)
6266    {    {
6267    int newnl = 0;    int newnl = 0;
6268    int newbsr = 0;    int newbsr = 0;
6269    
6270    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6271      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
6272    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
6273      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
6274    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
6275      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
6276    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
6277      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
6278    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
6279      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
6280    
6281    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
6282      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }      { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
6283    else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
6284      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }      { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
6285    
6286    if (newnl != 0)    if (newnl != 0)
# Line 6084  current code allows for fixed one- or tw Line 6308  current code allows for fixed one- or tw
6308  switch (options & PCRE_NEWLINE_BITS)  switch (options & PCRE_NEWLINE_BITS)
6309    {    {
6310    case 0: newline = NEWLINE; break;   /* Build-time default */    case 0: newline = NEWLINE; break;   /* Build-time default */
6311    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6312    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6313    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
6314         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6315    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
6316    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6317    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;

Legend:
Removed from v.360  
changed lines
  Added in v.411

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12