/[pcre]/code/tags/pcre-7.8/pcre_compile.c
ViewVC logotype

Diff of /code/tags/pcre-7.8/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 178 by ph10, Wed Jun 13 08:44:34 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 58  used by pcretest. DEBUG is not defined w Line 58  used by pcretest. DEBUG is not defined w
58  #endif  #endif
59    
60    
61    /* Macro for setting individual bits in class bitmaps. */
62    
63    #define SETBIT(a,b) a[b/8] |= (1 << (b%8))
64    
65    
66  /*************************************************  /*************************************************
67  *      Code parameters and static tables         *  *      Code parameters and static tables         *
68  *************************************************/  *************************************************/
# Line 82  are simple data values; negative values Line 87  are simple data values; negative values
87  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
88  is invalid. */  is invalid. */
89    
90  #if !EBCDIC   /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
91  static const short int escapes[] = {  static const short int escapes[] = {
92       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
93       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
94     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
95       0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */  -ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
96  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */
97  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
98     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
99       0,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */  -ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */
100  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0,      0, -ESC_w,   /* p - w */  -ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */
101       0,      0, -ESC_z                                            /* x - z */       0,      0, -ESC_z                                            /* x - z */
102  };  };
103    
104  #else         /* This is the "abnormal" table for EBCDIC systems */  #else           /* This is the "abnormal" table for EBCDIC systems */
105  static const short int escapes[] = {  static const short int escapes[] = {
106  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
107  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 106  static const short int escapes[] = { Line 111  static const short int escapes[] = {
111  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
112  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
113  /*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,  /*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
114  /*  88 */     0,     0,      0,     '{',      0,     0,      0,      0,  /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
115  /*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,  /*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,
116  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
117  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,     0, -ESC_w,      0,  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
118  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
119  /*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
120  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
121  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
122  /*  C8 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
123  /*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,  /*  D0 */   '}',     0,      0,       0,      0,     0,      0, -ESC_P,
124  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
125  /*  E0 */  '\\',     0, -ESC_S,       0,      0,     0, -ESC_W, -ESC_X,  /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
126  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
127  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
128  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
# Line 208  static const char *error_texts[] = { Line 213  static const char *error_texts[] = {
213    "malformed number or name after (?(",    "malformed number or name after (?(",
214    "conditional group contains more than two branches",    "conditional group contains more than two branches",
215    "assertion expected after (?(",    "assertion expected after (?(",
216    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
217    /* 30 */    /* 30 */
218    "unknown POSIX class name",    "unknown POSIX class name",
219    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 247  static const char *error_texts[] = {
247    /* 55 */    /* 55 */
248    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
249    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
250    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by a braced name or an optionally braced non-zero number",
251      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
252  };  };
253    
254    
# Line 262  For convenience, we use the same bit def Line 268  For convenience, we use the same bit def
268    
269  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
270    
271  #if !EBCDIC    /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */
272  static const unsigned char digitab[] =  static const unsigned char digitab[] =
273    {    {
274    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 298  static const unsigned char digitab[] = Line 304  static const unsigned char digitab[] =
304    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
305    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
306    
307  #else          /* This is the "abnormal" case, for EBCDIC systems */  #else           /* This is the "abnormal" case, for EBCDIC systems */
308  static const unsigned char digitab[] =  static const unsigned char digitab[] =
309    {    {
310    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 312  static const unsigned char digitab[] = Line 318  static const unsigned char digitab[] =
318    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
319    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
320    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
321    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88-     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
322    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
323    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
324    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
# Line 346  static const unsigned char ebcdic_charta Line 352  static const unsigned char ebcdic_charta
352    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
353    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
354    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
355    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88-  */    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
356    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
357    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
358    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
# Line 373  static const unsigned char ebcdic_charta Line 379  static const unsigned char ebcdic_charta
379  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
380    
381  static BOOL  static BOOL
382    compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,    compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
383      int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
384    
385    
386    
# Line 421  if (c == 0) *errorcodeptr = ERR1; Line 427  if (c == 0) *errorcodeptr = ERR1;
427  a table. A non-zero result is something that can be returned immediately.  a table. A non-zero result is something that can be returned immediately.
428  Otherwise further processing may be required. */  Otherwise further processing may be required. */
429    
430  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
431  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphameric */
432  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
433    
434  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
435  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
436  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
437  #endif  #endif
# Line 452  else Line 458  else
458    
459      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by a number, either plain or braced. If positive, it
460      is an absolute backreference. If negative, it is a relative backreference.      is an absolute backreference. If negative, it is a relative backreference.
461      This is a Perl 5.10 feature. */      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
462        reference to a named group. This is part of Perl's movement towards a
463        unified syntax for back references. As this is synonymous with \k{name}, we
464        fudge it up by pretending it really was \k. */
465    
466      case 'g':      case 'g':
467      if (ptr[1] == '{')      if (ptr[1] == '{')
468        {        {
469          const uschar *p;
470          for (p = ptr+2; *p != 0 && *p != '}'; p++)
471            if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
472          if (*p != 0 && *p != '}')
473            {
474            c = -ESC_k;
475            break;
476            }
477        braced = TRUE;        braced = TRUE;
478        ptr++;        ptr++;
479        }        }
# Line 562  else Line 579  else
579          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == '0') continue;     /* Leading zeroes */
580          count++;          count++;
581    
582  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
583          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= 'a') cc -= 32;               /* Convert to upper case */
584          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
585  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
586          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
587          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
588  #endif  #endif
# Line 589  else Line 606  else
606        {        {
607        int cc;                               /* Some compilers don't like ++ */        int cc;                               /* Some compilers don't like ++ */
608        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                        /* in initializers */
609  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
610        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= 'a') cc -= 32;              /* Convert to upper case */
611        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
612  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
613        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= 'z') cc += 64;              /* Convert to upper case */
614        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
615  #endif  #endif
# Line 611  else Line 628  else
628        return 0;        return 0;
629        }        }
630    
631  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
632      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= 'a' && c <= 'z') c -= 32;
633      c ^= 0x40;      c ^= 0x40;
634  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
635      if (c >= 'a' && c <= 'z') c += 64;      if (c >= 'a' && c <= 'z') c += 64;
636      c ^= 0xC0;      c ^= 0xC0;
637  #endif  #endif
# Line 1246  for (;;) Line 1263  for (;;)
1263    else    else
1264      {      {
1265      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1266    #ifdef SUPPORT_UTF8
1267      if (utf8) switch(c)      if (utf8) switch(c)
1268        {        {
1269        case OP_CHAR:        case OP_CHAR:
# Line 1266  for (;;) Line 1284  for (;;)
1284        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1285        break;        break;
1286        }        }
1287    #endif
1288      }      }
1289    }    }
1290  }  }
# Line 1309  for (;;) Line 1328  for (;;)
1328    else    else
1329      {      {
1330      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1331    #ifdef SUPPORT_UTF8
1332      if (utf8) switch(c)      if (utf8) switch(c)
1333        {        {
1334        case OP_CHAR:        case OP_CHAR:
# Line 1329  for (;;) Line 1349  for (;;)
1349        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1350        break;        break;
1351        }        }
1352    #endif
1353      }      }
1354    }    }
1355  }  }
# Line 1366  for (code = first_significant_code(code Line 1387  for (code = first_significant_code(code
1387    
1388    c = *code;    c = *code;
1389    
1390      /* Groups with zero repeats can of course be empty; skip them. */
1391    
1392      if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1393        {
1394        code += _pcre_OP_lengths[c];
1395        do code += GET(code, 1); while (*code == OP_ALT);
1396        c = *code;
1397        continue;
1398        }
1399    
1400      /* For other groups, scan the branches. */
1401    
1402    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1403      {      {
1404      BOOL empty_branch;      BOOL empty_branch;
# Line 1382  for (code = first_significant_code(code Line 1415  for (code = first_significant_code(code
1415        }        }
1416      while (*code == OP_ALT);      while (*code == OP_ALT);
1417      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1418        c = *code;
     /* Move past the KET and fudge things so that the increment in the "for"  
     above has no effect. */  
   
     c = OP_END;  
     code += 1 + LINK_SIZE - _pcre_OP_lengths[c];  
1419      continue;      continue;
1420      }      }
1421    
# Line 2087  for (;; ptr++) Line 2115  for (;; ptr++)
2115    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2116    BOOL is_quantifier;    BOOL is_quantifier;
2117    BOOL is_recurse;    BOOL is_recurse;
2118      BOOL reset_bracount;
2119    int class_charcount;    int class_charcount;
2120    int class_lastchar;    int class_lastchar;
2121    int newoptions;    int newoptions;
2122    int recno;    int recno;
2123      int refsign;
2124    int skipbytes;    int skipbytes;
2125    int subreqbyte;    int subreqbyte;
2126    int subfirstbyte;    int subfirstbyte;
# Line 2505  for (;; ptr++) Line 2535  for (;; ptr++)
2535    
2536              case ESC_E: /* Perl ignores an orphan \E */              case ESC_E: /* Perl ignores an orphan \E */
2537              continue;              continue;
2538    
2539              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
2540              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
2541              }              }
# Line 2514  for (;; ptr++) Line 2544  for (;; ptr++)
2544    
2545            else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||            else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||
2546                     c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;                     c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;
2547    
2548              /* We need to deal with \H, \h, \V, and \v in both phases because
2549              they use extra memory. */
2550    
2551              if (-c == ESC_h)
2552                {
2553                SETBIT(classbits, 0x09); /* VT */
2554                SETBIT(classbits, 0x20); /* SPACE */
2555                SETBIT(classbits, 0xa0); /* NSBP */
2556    #ifdef SUPPORT_UTF8
2557                if (utf8)
2558                  {
2559                  class_utf8 = TRUE;
2560                  *class_utf8data++ = XCL_SINGLE;
2561                  class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
2562                  *class_utf8data++ = XCL_SINGLE;
2563                  class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
2564                  *class_utf8data++ = XCL_RANGE;
2565                  class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
2566                  class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
2567                  *class_utf8data++ = XCL_SINGLE;
2568                  class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
2569                  *class_utf8data++ = XCL_SINGLE;
2570                  class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
2571                  *class_utf8data++ = XCL_SINGLE;
2572                  class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
2573                  }
2574    #endif
2575                continue;
2576                }
2577    
2578              if (-c == ESC_H)
2579                {
2580                for (c = 0; c < 32; c++)
2581                  {
2582                  int x = 0xff;
2583                  switch (c)
2584                    {
2585                    case 0x09/8: x ^= 1 << (0x09%8); break;
2586                    case 0x20/8: x ^= 1 << (0x20%8); break;
2587                    case 0xa0/8: x ^= 1 << (0xa0%8); break;
2588                    default: break;
2589                    }
2590                  classbits[c] |= x;
2591                  }
2592    
2593    #ifdef SUPPORT_UTF8
2594                if (utf8)
2595                  {
2596                  class_utf8 = TRUE;
2597                  *class_utf8data++ = XCL_RANGE;
2598                  class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
2599                  class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
2600                  *class_utf8data++ = XCL_RANGE;
2601                  class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
2602                  class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
2603                  *class_utf8data++ = XCL_RANGE;
2604                  class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
2605                  class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
2606                  *class_utf8data++ = XCL_RANGE;
2607                  class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
2608                  class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
2609                  *class_utf8data++ = XCL_RANGE;
2610                  class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
2611                  class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
2612                  *class_utf8data++ = XCL_RANGE;
2613                  class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
2614                  class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
2615                  *class_utf8data++ = XCL_RANGE;
2616                  class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
2617                  class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
2618                  }
2619    #endif
2620                continue;
2621                }
2622    
2623              if (-c == ESC_v)
2624                {
2625                SETBIT(classbits, 0x0a); /* LF */
2626                SETBIT(classbits, 0x0b); /* VT */
2627                SETBIT(classbits, 0x0c); /* FF */
2628                SETBIT(classbits, 0x0d); /* CR */
2629                SETBIT(classbits, 0x85); /* NEL */
2630    #ifdef SUPPORT_UTF8
2631                if (utf8)
2632                  {
2633                  class_utf8 = TRUE;
2634                  *class_utf8data++ = XCL_RANGE;
2635                  class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
2636                  class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
2637                  }
2638    #endif
2639                continue;
2640                }
2641    
2642              if (-c == ESC_V)
2643                {
2644                for (c = 0; c < 32; c++)
2645                  {
2646                  int x = 0xff;
2647                  switch (c)
2648                    {
2649                    case 0x0a/8: x ^= 1 << (0x0a%8);
2650                                 x ^= 1 << (0x0b%8);
2651                                 x ^= 1 << (0x0c%8);
2652                                 x ^= 1 << (0x0d%8);
2653                                 break;
2654                    case 0x85/8: x ^= 1 << (0x85%8); break;
2655                    default: break;
2656                    }
2657                  classbits[c] |= x;
2658                  }
2659    
2660    #ifdef SUPPORT_UTF8
2661                if (utf8)
2662                  {
2663                  class_utf8 = TRUE;
2664                  *class_utf8data++ = XCL_RANGE;
2665                  class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
2666                  class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
2667                  *class_utf8data++ = XCL_RANGE;
2668                  class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
2669                  class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
2670                  }
2671    #endif
2672                continue;
2673                }
2674    
2675            /* We need to deal with \P and \p in both phases. */            /* We need to deal with \P and \p in both phases. */
2676    
# Line 2655  for (;; ptr++) Line 2812  for (;; ptr++)
2812              unsigned int origd = d;              unsigned int origd = d;
2813              while (get_othercase_range(&cc, origd, &occ, &ocd))              while (get_othercase_range(&cc, origd, &occ, &ocd))
2814                {                {
2815                if (occ >= c && ocd <= d) continue;  /* Skip embedded ranges */                if (occ >= (unsigned int)c &&
2816                      ocd <= (unsigned int)d)
2817                    continue;                          /* Skip embedded ranges */
2818    
2819                if (occ < c  && ocd >= c - 1)        /* Extend the basic range */                if (occ < (unsigned int)c  &&
2820                      ocd >= (unsigned int)c - 1)      /* Extend the basic range */
2821                  {                                  /* if there is overlap,   */                  {                                  /* if there is overlap,   */
2822                  c = occ;                           /* noting that if occ < c */                  c = occ;                           /* noting that if occ < c */
2823                  continue;                          /* we can't have ocd > d  */                  continue;                          /* we can't have ocd > d  */
2824                  }                                  /* because a subrange is  */                  }                                  /* because a subrange is  */
2825                if (ocd > d && occ <= d + 1)         /* always shorter than    */                if (ocd > (unsigned int)d &&
2826                      occ <= (unsigned int)d + 1)      /* always shorter than    */
2827                  {                                  /* the basic range.       */                  {                                  /* the basic range.       */
2828                  d = ocd;                  d = ocd;
2829                  continue;                  continue;
# Line 3560  for (;; ptr++) Line 3721  for (;; ptr++)
3721      skipbytes = 0;      skipbytes = 0;
3722      bravalue = OP_CBRA;      bravalue = OP_CBRA;
3723      save_hwm = cd->hwm;      save_hwm = cd->hwm;
3724        reset_bracount = FALSE;
3725    
3726      if (*(++ptr) == '?')      if (*(++ptr) == '?')
3727        {        {
# Line 3582  for (;; ptr++) Line 3744  for (;; ptr++)
3744    
3745    
3746          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
3747            case '|':                 /* Reset capture count for each branch */
3748            reset_bracount = TRUE;
3749            /* Fall through */
3750    
3751            /* ------------------------------------------------------------ */
3752          case ':':                 /* Non-capturing bracket */          case ':':                 /* Non-capturing bracket */
3753          bravalue = OP_BRA;          bravalue = OP_BRA;
3754          ptr++;          ptr++;
# Line 3617  for (;; ptr++) Line 3784  for (;; ptr++)
3784    
3785          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3786          skipbytes = 3;          skipbytes = 3;
3787            refsign = -1;
3788    
3789          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3790    
# Line 3640  for (;; ptr++) Line 3808  for (;; ptr++)
3808            terminator = '\'';            terminator = '\'';
3809            ptr++;            ptr++;
3810            }            }
3811          else terminator = 0;          else
3812              {
3813              terminator = 0;
3814              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3815              }
3816    
3817          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3818    
# Line 3676  for (;; ptr++) Line 3848  for (;; ptr++)
3848          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3849    
3850          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3851          reference. */          reference. If the string started with "+" or "-" we require the rest to
3852            be digits, in which case recno will be set. */
3853    
3854            if (refsign > 0)
3855              {
3856              if (recno <= 0)
3857                {
3858                *errorcodeptr = ERR58;
3859                goto FAILED;
3860                }
3861              if (refsign == '-')
3862                {
3863                recno = cd->bracount - recno + 1;
3864                if (recno <= 0)
3865                  {
3866                  *errorcodeptr = ERR15;
3867                  goto FAILED;
3868                  }
3869                }
3870              else recno += cd->bracount;
3871              PUT2(code, 2+LINK_SIZE, recno);
3872              break;
3873              }
3874    
3875            /* Otherwise (did not start with "+" or "-"), start by looking for the
3876            name. */
3877    
3878          slot = cd->name_table;          slot = cd->name_table;
3879          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 3995  for (;; ptr++) Line 4192  for (;; ptr++)
4192    
4193    
4194          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4195            case '-': case '+':
4196          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4197          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4198            {            {
4199            const uschar *called;            const uschar *called;
4200    
4201              if ((refsign = *ptr) == '+') ptr++;
4202              else if (refsign == '-')
4203                {
4204                if ((digitab[ptr[1]] & ctype_digit) == 0)
4205                  goto OTHER_CHAR_AFTER_QUERY;
4206                ptr++;
4207                }
4208    
4209            recno = 0;            recno = 0;
4210            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4211              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4212    
4213            if (*ptr != ')')            if (*ptr != ')')
4214              {              {
4215              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4216              goto FAILED;              goto FAILED;
4217              }              }
4218    
4219              if (refsign == '-')
4220                {
4221                if (recno == 0)
4222                  {
4223                  *errorcodeptr = ERR58;
4224                  goto FAILED;
4225                  }
4226                recno = cd->bracount - recno + 1;
4227                if (recno <= 0)
4228                  {
4229                  *errorcodeptr = ERR15;
4230                  goto FAILED;
4231                  }
4232                }
4233              else if (refsign == '+')
4234                {
4235                if (recno == 0)
4236                  {
4237                  *errorcodeptr = ERR58;
4238                  goto FAILED;
4239                  }
4240                recno += cd->bracount;
4241                }
4242    
4243            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4244    
4245            HANDLE_RECURSION:            HANDLE_RECURSION:
# Line 4080  for (;; ptr++) Line 4312  for (;; ptr++)
4312    
4313          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4314          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4315            OTHER_CHAR_AFTER_QUERY:
4316          set = unset = 0;          set = unset = 0;
4317          optset = &set;          optset = &set;
4318    
# Line 4214  for (;; ptr++) Line 4447  for (;; ptr++)
4447           errorcodeptr,                 /* Where to put an error message */           errorcodeptr,                 /* Where to put an error message */
4448           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
4449            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
4450             reset_bracount,               /* True if (?| group */
4451           skipbytes,                    /* Skip over bracket number */           skipbytes,                    /* Skip over bracket number */
4452           &subfirstbyte,                /* For possible first char */           &subfirstbyte,                /* For possible first char */
4453           &subreqbyte,                  /* For possible last char */           &subreqbyte,                  /* For possible last char */
# Line 4230  for (;; ptr++) Line 4464  for (;; ptr++)
4464      is on the bracket. */      is on the bracket. */
4465    
4466      /* If this is a conditional bracket, check that there are no more than      /* If this is a conditional bracket, check that there are no more than
4467      two branches in the group, or just one if it's a DEFINE group. */      two branches in the group, or just one if it's a DEFINE group. We do this
4468        in the real compile phase, not in the pre-pass, where the whole group may
4469        not be available. */
4470    
4471      if (bravalue == OP_COND)      if (bravalue == OP_COND && lengthptr == NULL)
4472        {        {
4473        uschar *tc = code;        uschar *tc = code;
4474        int condcount = 0;        int condcount = 0;
# Line 4392  for (;; ptr++) Line 4628  for (;; ptr++)
4628        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
4629        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
4630    
4631        /* \k<name> or \k'name' is a back reference by name (Perl syntax) */        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
4632          We also support \k{name} (.NET syntax) */
4633    
4634        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
4635          {          {
4636          is_recurse = FALSE;          is_recurse = FALSE;
4637          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
4638          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
4639          }          }
4640    
# Line 4563  This function is used during the pre-com Line 4800  This function is used during the pre-com
4800  out the amount of memory needed, as well as during the real compile phase. The  out the amount of memory needed, as well as during the real compile phase. The
4801  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
4802    
4803  Argument:  Arguments:
4804    options        option bits, including any changes for this subpattern    options        option bits, including any changes for this subpattern
4805    oldims         previous settings of ims option bits    oldims         previous settings of ims option bits
4806    codeptr        -> the address of the current code pointer    codeptr        -> the address of the current code pointer
4807    ptrptr         -> the address of the current pattern pointer    ptrptr         -> the address of the current pattern pointer
4808    errorcodeptr   -> pointer to error code variable    errorcodeptr   -> pointer to error code variable
4809    lookbehind     TRUE if this is a lookbehind assertion    lookbehind     TRUE if this is a lookbehind assertion
4810      reset_bracount TRUE to reset the count for each branch
4811    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
4812    firstbyteptr   place to put the first required character, or a negative number    firstbyteptr   place to put the first required character, or a negative number
4813    reqbyteptr     place to put the last required character, or a negative number    reqbyteptr     place to put the last required character, or a negative number
# Line 4583  Returns: TRUE on success Line 4821  Returns: TRUE on success
4821    
4822  static BOOL  static BOOL
4823  compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
4824    int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
4825    int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)    int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
4826      int *lengthptr)
4827  {  {
4828  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
4829  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 4594  uschar *reverse_count = NULL; Line 4833  uschar *reverse_count = NULL;
4833  int firstbyte, reqbyte;  int firstbyte, reqbyte;
4834  int branchfirstbyte, branchreqbyte;  int branchfirstbyte, branchreqbyte;
4835  int length;  int length;
4836    int orig_bracount;
4837    int max_bracount;
4838  branch_chain bc;  branch_chain bc;
4839    
4840  bc.outer = bcptr;  bc.outer = bcptr;
# Line 4622  code += 1 + LINK_SIZE + skipbytes; Line 4863  code += 1 + LINK_SIZE + skipbytes;
4863    
4864  /* Loop for each alternative branch */  /* Loop for each alternative branch */
4865    
4866    orig_bracount = max_bracount = cd->bracount;
4867  for (;;)  for (;;)
4868    {    {
4869      /* For a (?| group, reset the capturing bracket count so that each branch
4870      uses the same numbers. */
4871    
4872      if (reset_bracount) cd->bracount = orig_bracount;
4873    
4874    /* Handle a change of ims options at the start of the branch */    /* Handle a change of ims options at the start of the branch */
4875    
4876    if ((options & PCRE_IMS) != oldims)    if ((options & PCRE_IMS) != oldims)
# Line 4652  for (;;) Line 4899  for (;;)
4899      *ptrptr = ptr;      *ptrptr = ptr;
4900      return FALSE;      return FALSE;
4901      }      }
4902    
4903      /* Keep the highest bracket count in case (?| was used and some branch
4904      has fewer than the rest. */
4905    
4906      if (cd->bracount > max_bracount) max_bracount = cd->bracount;
4907    
4908    /* In the real compile phase, there is some post-processing to be done. */    /* In the real compile phase, there is some post-processing to be done. */
4909    
# Line 4716  for (;;) Line 4968  for (;;)
4968        }        }
4969      }      }
4970    
4971    /* Reached end of expression, either ')' or end of pattern. Go back through    /* Reached end of expression, either ')' or end of pattern. In the real
4972    the alternative branches and reverse the chain of offsets, with the field in    compile phase, go back through the alternative branches and reverse the chain
4973    the BRA item now becoming an offset to the first alternative. If there are    of offsets, with the field in the BRA item now becoming an offset to the
4974    no alternatives, it points to the end of the group. The length in the    first alternative. If there are no alternatives, it points to the end of the
4975    terminating ket is always the length of the whole bracketed item. If any of    group. The length in the terminating ket is always the length of the whole
4976    the ims options were changed inside the group, compile a resetting op-code    bracketed item. If any of the ims options were changed inside the group,
4977    following, except at the very end of the pattern. Return leaving the pointer    compile a resetting op-code following, except at the very end of the pattern.
4978    at the terminating char. */    Return leaving the pointer at the terminating char. */
4979    
4980    if (*ptr != '|')    if (*ptr != '|')
4981      {      {
4982      int branch_length = code - last_branch;      if (lengthptr == NULL)
     do  
4983        {        {
4984        int prev_length = GET(last_branch, 1);        int branch_length = code - last_branch;
4985        PUT(last_branch, 1, branch_length);        do
4986        branch_length = prev_length;          {
4987        last_branch -= branch_length;          int prev_length = GET(last_branch, 1);
4988            PUT(last_branch, 1, branch_length);
4989            branch_length = prev_length;
4990            last_branch -= branch_length;
4991            }
4992          while (branch_length > 0);
4993        }        }
     while (branch_length > 0);  
4994    
4995      /* Fill in the ket */      /* Fill in the ket */
4996    
# Line 4751  for (;;) Line 5006  for (;;)
5006        *code++ = oldims;        *code++ = oldims;
5007        length += 2;        length += 2;
5008        }        }
5009    
5010        /* Retain the highest bracket number, in case resetting was used. */
5011    
5012        cd->bracount = max_bracount;
5013    
5014      /* Set values to pass back */      /* Set values to pass back */
5015    
# Line 4762  for (;;) Line 5021  for (;;)
5021      return TRUE;      return TRUE;
5022      }      }
5023    
5024    /* Another branch follows; insert an "or" node. Its length field points back    /* Another branch follows. In the pre-compile phase, we can move the code
5025      pointer back to where it was for the start of the first branch. (That is,
5026      pretend that each branch is the only one.)
5027    
5028      In the real compile phase, insert an ALT node. Its length field points back
5029    to the previous branch while the bracket remains open. At the end the chain    to the previous branch while the bracket remains open. At the end the chain
5030    is reversed. It's done like this so that the start of the bracket has a    is reversed. It's done like this so that the start of the bracket has a
5031    zero offset until it is closed, making it possible to detect recursion. */    zero offset until it is closed, making it possible to detect recursion. */
5032    
5033    *code = OP_ALT;    if (lengthptr != NULL)
5034    PUT(code, 1, code - last_branch);      {
5035    bc.current = last_branch = code;      code = *codeptr + 1 + LINK_SIZE + skipbytes;
5036    code += 1 + LINK_SIZE;      length += 1 + LINK_SIZE;
5037        }
5038      else
5039        {
5040        *code = OP_ALT;
5041        PUT(code, 1, code - last_branch);
5042        bc.current = last_branch = code;
5043        code += 1 + LINK_SIZE;
5044        }
5045    
5046    ptr++;    ptr++;
   length += 1 + LINK_SIZE;  
5047    }    }
5048  /* Control never reaches here */  /* Control never reaches here */
5049  }  }
# Line 5039  Returns: pointer to compiled data Line 5310  Returns: pointer to compiled data
5310                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5311  */  */
5312    
5313  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5314  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5315    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5316  {  {
# Line 5047  return pcre_compile2(pattern, options, N Line 5318  return pcre_compile2(pattern, options, N
5318  }  }
5319    
5320    
5321  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5322  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5323    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5324  {  {
# Line 5096  if (errorcodeptr != NULL) *errorcodeptr Line 5367  if (errorcodeptr != NULL) *errorcodeptr
5367  if (erroroffset == NULL)  if (erroroffset == NULL)
5368    {    {
5369    errorcode = ERR16;    errorcode = ERR16;
5370    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5371    }    }
5372    
5373  *erroroffset = 0;  *erroroffset = 0;
# Line 5109  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5380  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5380       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5381    {    {
5382    errorcode = ERR44;    errorcode = ERR44;
5383    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5384    }    }
5385  #else  #else
5386  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5134  cd->cbits = tables + cbits_offset; Line 5405  cd->cbits = tables + cbits_offset;
5405  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5406    
5407  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5408  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5409    "anycrlf". */
5410    
5411  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5412    {    {
# Line 5144  switch (options & (PCRE_NEWLINE_CRLF | P Line 5416  switch (options & (PCRE_NEWLINE_CRLF | P
5416    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5417         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5418    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5419      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5420    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5421    }    }
5422    
5423  if (newline < 0)  if (newline == -2)
5424      {
5425      cd->nltype = NLTYPE_ANYCRLF;
5426      }
5427    else if (newline < 0)
5428    {    {
5429    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5430    }    }
# Line 5208  outside can help speed up starting point Line 5485  outside can help speed up starting point
5485  code = cworkspace;  code = cworkspace;
5486  *code = OP_BRA;  *code = OP_BRA;
5487  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
5488    &code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);    &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
5489      &length);
5490  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
5491    
5492  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
# Line 5276  ptr = (const uschar *)pattern; Line 5554  ptr = (const uschar *)pattern;
5554  code = (uschar *)codestart;  code = (uschar *)codestart;
5555  *code = OP_BRA;  *code = OP_BRA;
5556  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
5557    &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
5558  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
5559  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
5560    
# Line 5321  if (errorcode != 0) Line 5599  if (errorcode != 0)
5599    (pcre_free)(re);    (pcre_free)(re);
5600    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5601    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5602  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5603    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5604    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5605    return NULL;    return NULL;
# Line 5413  if ((re->options & PCRE_REQCHSET) != 0) Line 5689  if ((re->options & PCRE_REQCHSET) != 0)
5689      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5690    }    }
5691    
5692  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5693    
5694  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5695  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.96  
changed lines
  Added in v.178

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12