/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 107 by ph10, Wed Mar 7 11:02:28 2007 UTC revision 175 by ph10, Mon Jun 11 13:38:38 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 87  static const short int escapes[] = { Line 87  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
90       0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */       0,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
91  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
92  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
93     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
# Line 208  static const char *error_texts[] = { Line 208  static const char *error_texts[] = {
208    "malformed number or name after (?(",    "malformed number or name after (?(",
209    "conditional group contains more than two branches",    "conditional group contains more than two branches",
210    "assertion expected after (?(",    "assertion expected after (?(",
211    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
212    /* 30 */    /* 30 */
213    "unknown POSIX class name",    "unknown POSIX class name",
214    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 242  static const char *error_texts[] = {
242    /* 55 */    /* 55 */
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by a braced name or an optionally braced non-zero number",
246      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 373  static const unsigned char ebcdic_charta Line 374  static const unsigned char ebcdic_charta
374  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
375    
376  static BOOL  static BOOL
377    compile_regex(int, int, uschar **, const uschar **, int *, BOOL, int, int *,    compile_regex(int, int, uschar **, const uschar **, int *, BOOL, BOOL, int,
378      int *, branch_chain *, compile_data *, int *);      int *, int *, branch_chain *, compile_data *, int *);
379    
380    
381    
# Line 452  else Line 453  else
453    
454      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by a number, either plain or braced. If positive, it
455      is an absolute backreference. If negative, it is a relative backreference.      is an absolute backreference. If negative, it is a relative backreference.
456      This is a Perl 5.10 feature. */      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
457        reference to a named group. This is part of Perl's movement towards a
458        unified syntax for back references. As this is synonymous with \k{name}, we
459        fudge it up by pretending it really was \k. */
460    
461      case 'g':      case 'g':
462      if (ptr[1] == '{')      if (ptr[1] == '{')
463        {        {
464          const uschar *p;
465          for (p = ptr+2; *p != 0 && *p != '}'; p++)
466            if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
467          if (*p != 0 && *p != '}')
468            {
469            c = -ESC_k;
470            break;
471            }
472        braced = TRUE;        braced = TRUE;
473        ptr++;        ptr++;
474        }        }
# Line 1267  for (;;) Line 1279  for (;;)
1279        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1280        break;        break;
1281        }        }
1282  #endif  #endif
1283      }      }
1284    }    }
1285  }  }
# Line 1332  for (;;) Line 1344  for (;;)
1344        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1345        break;        break;
1346        }        }
1347  #endif  #endif
1348      }      }
1349    }    }
1350  }  }
# Line 1370  for (code = first_significant_code(code Line 1382  for (code = first_significant_code(code
1382    
1383    c = *code;    c = *code;
1384    
1385      /* Groups with zero repeats can of course be empty; skip them. */
1386    
1387      if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1388        {
1389        code += _pcre_OP_lengths[c];
1390        do code += GET(code, 1); while (*code == OP_ALT);
1391        c = *code;
1392        continue;
1393        }
1394    
1395      /* For other groups, scan the branches. */
1396    
1397    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1398      {      {
1399      BOOL empty_branch;      BOOL empty_branch;
# Line 1386  for (code = first_significant_code(code Line 1410  for (code = first_significant_code(code
1410        }        }
1411      while (*code == OP_ALT);      while (*code == OP_ALT);
1412      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1413        c = *code;
     /* Move past the KET and fudge things so that the increment in the "for"  
     above has no effect. */  
   
     c = OP_END;  
     code += 1 + LINK_SIZE - _pcre_OP_lengths[c];  
1414      continue;      continue;
1415      }      }
1416    
# Line 2091  for (;; ptr++) Line 2110  for (;; ptr++)
2110    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2111    BOOL is_quantifier;    BOOL is_quantifier;
2112    BOOL is_recurse;    BOOL is_recurse;
2113      BOOL reset_bracount;
2114    int class_charcount;    int class_charcount;
2115    int class_lastchar;    int class_lastchar;
2116    int newoptions;    int newoptions;
2117    int recno;    int recno;
2118      int refsign;
2119    int skipbytes;    int skipbytes;
2120    int subreqbyte;    int subreqbyte;
2121    int subfirstbyte;    int subfirstbyte;
# Line 3564  for (;; ptr++) Line 3585  for (;; ptr++)
3585      skipbytes = 0;      skipbytes = 0;
3586      bravalue = OP_CBRA;      bravalue = OP_CBRA;
3587      save_hwm = cd->hwm;      save_hwm = cd->hwm;
3588        reset_bracount = FALSE;
3589    
3590      if (*(++ptr) == '?')      if (*(++ptr) == '?')
3591        {        {
# Line 3586  for (;; ptr++) Line 3608  for (;; ptr++)
3608    
3609    
3610          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
3611            case '|':                 /* Reset capture count for each branch */
3612            reset_bracount = TRUE;
3613            /* Fall through */
3614    
3615            /* ------------------------------------------------------------ */
3616          case ':':                 /* Non-capturing bracket */          case ':':                 /* Non-capturing bracket */
3617          bravalue = OP_BRA;          bravalue = OP_BRA;
3618          ptr++;          ptr++;
# Line 3621  for (;; ptr++) Line 3648  for (;; ptr++)
3648    
3649          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3650          skipbytes = 3;          skipbytes = 3;
3651            refsign = -1;
3652    
3653          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3654    
# Line 3644  for (;; ptr++) Line 3672  for (;; ptr++)
3672            terminator = '\'';            terminator = '\'';
3673            ptr++;            ptr++;
3674            }            }
3675          else terminator = 0;          else
3676              {
3677              terminator = 0;
3678              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3679              }
3680    
3681          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3682    
# Line 3680  for (;; ptr++) Line 3712  for (;; ptr++)
3712          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3713    
3714          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3715          reference. */          reference. If the string started with "+" or "-" we require the rest to
3716            be digits, in which case recno will be set. */
3717    
3718            if (refsign > 0)
3719              {
3720              if (recno <= 0)
3721                {
3722                *errorcodeptr = ERR58;
3723                goto FAILED;
3724                }
3725              if (refsign == '-')
3726                {
3727                recno = cd->bracount - recno + 1;
3728                if (recno <= 0)
3729                  {
3730                  *errorcodeptr = ERR15;
3731                  goto FAILED;
3732                  }
3733                }
3734              else recno += cd->bracount;
3735              PUT2(code, 2+LINK_SIZE, recno);
3736              break;
3737              }
3738    
3739            /* Otherwise (did not start with "+" or "-"), start by looking for the
3740            name. */
3741    
3742          slot = cd->name_table;          slot = cd->name_table;
3743          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
# Line 3999  for (;; ptr++) Line 4056  for (;; ptr++)
4056    
4057    
4058          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4059            case '-': case '+':
4060          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4061          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4062            {            {
4063            const uschar *called;            const uschar *called;
4064    
4065              if ((refsign = *ptr) == '+') ptr++;
4066              else if (refsign == '-')
4067                {
4068                if ((digitab[ptr[1]] & ctype_digit) == 0)
4069                  goto OTHER_CHAR_AFTER_QUERY;
4070                ptr++;
4071                }
4072    
4073            recno = 0;            recno = 0;
4074            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4075              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4076    
4077            if (*ptr != ')')            if (*ptr != ')')
4078              {              {
4079              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4080              goto FAILED;              goto FAILED;
4081              }              }
4082    
4083              if (refsign == '-')
4084                {
4085                if (recno == 0)
4086                  {
4087                  *errorcodeptr = ERR58;
4088                  goto FAILED;
4089                  }
4090                recno = cd->bracount - recno + 1;
4091                if (recno <= 0)
4092                  {
4093                  *errorcodeptr = ERR15;
4094                  goto FAILED;
4095                  }
4096                }
4097              else if (refsign == '+')
4098                {
4099                if (recno == 0)
4100                  {
4101                  *errorcodeptr = ERR58;
4102                  goto FAILED;
4103                  }
4104                recno += cd->bracount;
4105                }
4106    
4107            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4108    
4109            HANDLE_RECURSION:            HANDLE_RECURSION:
# Line 4084  for (;; ptr++) Line 4176  for (;; ptr++)
4176    
4177          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4178          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4179            OTHER_CHAR_AFTER_QUERY:
4180          set = unset = 0;          set = unset = 0;
4181          optset = &set;          optset = &set;
4182    
# Line 4218  for (;; ptr++) Line 4311  for (;; ptr++)
4311           errorcodeptr,                 /* Where to put an error message */           errorcodeptr,                 /* Where to put an error message */
4312           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
4313            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
4314             reset_bracount,               /* True if (?| group */
4315           skipbytes,                    /* Skip over bracket number */           skipbytes,                    /* Skip over bracket number */
4316           &subfirstbyte,                /* For possible first char */           &subfirstbyte,                /* For possible first char */
4317           &subreqbyte,                  /* For possible last char */           &subreqbyte,                  /* For possible last char */
# Line 4234  for (;; ptr++) Line 4328  for (;; ptr++)
4328      is on the bracket. */      is on the bracket. */
4329    
4330      /* If this is a conditional bracket, check that there are no more than      /* If this is a conditional bracket, check that there are no more than
4331      two branches in the group, or just one if it's a DEFINE group. */      two branches in the group, or just one if it's a DEFINE group. We do this
4332        in the real compile phase, not in the pre-pass, where the whole group may
4333        not be available. */
4334    
4335      if (bravalue == OP_COND)      if (bravalue == OP_COND && lengthptr == NULL)
4336        {        {
4337        uschar *tc = code;        uschar *tc = code;
4338        int condcount = 0;        int condcount = 0;
# Line 4396  for (;; ptr++) Line 4492  for (;; ptr++)
4492        zerofirstbyte = firstbyte;        zerofirstbyte = firstbyte;
4493        zeroreqbyte = reqbyte;        zeroreqbyte = reqbyte;
4494    
4495        /* \k<name> or \k'name' is a back reference by name (Perl syntax) */        /* \k<name> or \k'name' is a back reference by name (Perl syntax).
4496          We also support \k{name} (.NET syntax) */
4497    
4498        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\''))        if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
4499          {          {
4500          is_recurse = FALSE;          is_recurse = FALSE;
4501          terminator = (*(++ptr) == '<')? '>' : '\'';          terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
4502          goto NAMED_REF_OR_RECURSE;          goto NAMED_REF_OR_RECURSE;
4503          }          }
4504    
# Line 4567  This function is used during the pre-com Line 4664  This function is used during the pre-com
4664  out the amount of memory needed, as well as during the real compile phase. The  out the amount of memory needed, as well as during the real compile phase. The
4665  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
4666    
4667  Argument:  Arguments:
4668    options        option bits, including any changes for this subpattern    options        option bits, including any changes for this subpattern
4669    oldims         previous settings of ims option bits    oldims         previous settings of ims option bits
4670    codeptr        -> the address of the current code pointer    codeptr        -> the address of the current code pointer
4671    ptrptr         -> the address of the current pattern pointer    ptrptr         -> the address of the current pattern pointer
4672    errorcodeptr   -> pointer to error code variable    errorcodeptr   -> pointer to error code variable
4673    lookbehind     TRUE if this is a lookbehind assertion    lookbehind     TRUE if this is a lookbehind assertion
4674      reset_bracount TRUE to reset the count for each branch
4675    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes      skip this many bytes at start (for brackets and OP_COND)
4676    firstbyteptr   place to put the first required character, or a negative number    firstbyteptr   place to put the first required character, or a negative number
4677    reqbyteptr     place to put the last required character, or a negative number    reqbyteptr     place to put the last required character, or a negative number
# Line 4587  Returns: TRUE on success Line 4685  Returns: TRUE on success
4685    
4686  static BOOL  static BOOL
4687  compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,  compile_regex(int options, int oldims, uschar **codeptr, const uschar **ptrptr,
4688    int *errorcodeptr, BOOL lookbehind, int skipbytes, int *firstbyteptr,    int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
4689    int *reqbyteptr, branch_chain *bcptr, compile_data *cd, int *lengthptr)    int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd,
4690      int *lengthptr)
4691  {  {
4692  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
4693  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 4598  uschar *reverse_count = NULL; Line 4697  uschar *reverse_count = NULL;
4697  int firstbyte, reqbyte;  int firstbyte, reqbyte;
4698  int branchfirstbyte, branchreqbyte;  int branchfirstbyte, branchreqbyte;
4699  int length;  int length;
4700    int orig_bracount;
4701    int max_bracount;
4702  branch_chain bc;  branch_chain bc;
4703    
4704  bc.outer = bcptr;  bc.outer = bcptr;
# Line 4626  code += 1 + LINK_SIZE + skipbytes; Line 4727  code += 1 + LINK_SIZE + skipbytes;
4727    
4728  /* Loop for each alternative branch */  /* Loop for each alternative branch */
4729    
4730    orig_bracount = max_bracount = cd->bracount;
4731  for (;;)  for (;;)
4732    {    {
4733      /* For a (?| group, reset the capturing bracket count so that each branch
4734      uses the same numbers. */
4735    
4736      if (reset_bracount) cd->bracount = orig_bracount;
4737    
4738    /* Handle a change of ims options at the start of the branch */    /* Handle a change of ims options at the start of the branch */
4739    
4740    if ((options & PCRE_IMS) != oldims)    if ((options & PCRE_IMS) != oldims)
# Line 4656  for (;;) Line 4763  for (;;)
4763      *ptrptr = ptr;      *ptrptr = ptr;
4764      return FALSE;      return FALSE;
4765      }      }
4766    
4767      /* Keep the highest bracket count in case (?| was used and some branch
4768      has fewer than the rest. */
4769    
4770      if (cd->bracount > max_bracount) max_bracount = cd->bracount;
4771    
4772    /* In the real compile phase, there is some post-processing to be done. */    /* In the real compile phase, there is some post-processing to be done. */
4773    
# Line 4720  for (;;) Line 4832  for (;;)
4832        }        }
4833      }      }
4834    
4835    /* Reached end of expression, either ')' or end of pattern. Go back through    /* Reached end of expression, either ')' or end of pattern. In the real
4836    the alternative branches and reverse the chain of offsets, with the field in    compile phase, go back through the alternative branches and reverse the chain
4837    the BRA item now becoming an offset to the first alternative. If there are    of offsets, with the field in the BRA item now becoming an offset to the
4838    no alternatives, it points to the end of the group. The length in the    first alternative. If there are no alternatives, it points to the end of the
4839    terminating ket is always the length of the whole bracketed item. If any of    group. The length in the terminating ket is always the length of the whole
4840    the ims options were changed inside the group, compile a resetting op-code    bracketed item. If any of the ims options were changed inside the group,
4841    following, except at the very end of the pattern. Return leaving the pointer    compile a resetting op-code following, except at the very end of the pattern.
4842    at the terminating char. */    Return leaving the pointer at the terminating char. */
4843    
4844    if (*ptr != '|')    if (*ptr != '|')
4845      {      {
4846      int branch_length = code - last_branch;      if (lengthptr == NULL)
     do  
4847        {        {
4848        int prev_length = GET(last_branch, 1);        int branch_length = code - last_branch;
4849        PUT(last_branch, 1, branch_length);        do
4850        branch_length = prev_length;          {
4851        last_branch -= branch_length;          int prev_length = GET(last_branch, 1);
4852            PUT(last_branch, 1, branch_length);
4853            branch_length = prev_length;
4854            last_branch -= branch_length;
4855            }
4856          while (branch_length > 0);
4857        }        }
     while (branch_length > 0);  
4858    
4859      /* Fill in the ket */      /* Fill in the ket */
4860    
# Line 4755  for (;;) Line 4870  for (;;)
4870        *code++ = oldims;        *code++ = oldims;
4871        length += 2;        length += 2;
4872        }        }
4873    
4874        /* Retain the highest bracket number, in case resetting was used. */
4875    
4876        cd->bracount = max_bracount;
4877    
4878      /* Set values to pass back */      /* Set values to pass back */
4879    
# Line 4766  for (;;) Line 4885  for (;;)
4885      return TRUE;      return TRUE;
4886      }      }
4887    
4888    /* Another branch follows; insert an "or" node. Its length field points back    /* Another branch follows. In the pre-compile phase, we can move the code
4889      pointer back to where it was for the start of the first branch. (That is,
4890      pretend that each branch is the only one.)
4891    
4892      In the real compile phase, insert an ALT node. Its length field points back
4893    to the previous branch while the bracket remains open. At the end the chain    to the previous branch while the bracket remains open. At the end the chain
4894    is reversed. It's done like this so that the start of the bracket has a    is reversed. It's done like this so that the start of the bracket has a
4895    zero offset until it is closed, making it possible to detect recursion. */    zero offset until it is closed, making it possible to detect recursion. */
4896    
4897    *code = OP_ALT;    if (lengthptr != NULL)
4898    PUT(code, 1, code - last_branch);      {
4899    bc.current = last_branch = code;      code = *codeptr + 1 + LINK_SIZE + skipbytes;
4900    code += 1 + LINK_SIZE;      length += 1 + LINK_SIZE;
4901        }
4902      else
4903        {
4904        *code = OP_ALT;
4905        PUT(code, 1, code - last_branch);
4906        bc.current = last_branch = code;
4907        code += 1 + LINK_SIZE;
4908        }
4909    
4910    ptr++;    ptr++;
   length += 1 + LINK_SIZE;  
4911    }    }
4912  /* Control never reaches here */  /* Control never reaches here */
4913  }  }
# Line 5043  Returns: pointer to compiled data Line 5174  Returns: pointer to compiled data
5174                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5175  */  */
5176    
5177  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5178  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5179    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5180  {  {
# Line 5051  return pcre_compile2(pattern, options, N Line 5182  return pcre_compile2(pattern, options, N
5182  }  }
5183    
5184    
5185  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5186  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5187    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5188  {  {
# Line 5100  if (errorcodeptr != NULL) *errorcodeptr Line 5231  if (errorcodeptr != NULL) *errorcodeptr
5231  if (erroroffset == NULL)  if (erroroffset == NULL)
5232    {    {
5233    errorcode = ERR16;    errorcode = ERR16;
5234    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5235    }    }
5236    
5237  *erroroffset = 0;  *erroroffset = 0;
# Line 5113  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5244  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5244       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5245    {    {
5246    errorcode = ERR44;    errorcode = ERR44;
5247    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5248    }    }
5249  #else  #else
5250  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5138  cd->cbits = tables + cbits_offset; Line 5269  cd->cbits = tables + cbits_offset;
5269  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5270    
5271  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5272  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5273    "anycrlf". */
5274    
5275  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5276    {    {
# Line 5148  switch (options & (PCRE_NEWLINE_CRLF | P Line 5280  switch (options & (PCRE_NEWLINE_CRLF | P
5280    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5281         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5282    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5283      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5284    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5285    }    }
5286    
5287  if (newline < 0)  if (newline == -2)
5288      {
5289      cd->nltype = NLTYPE_ANYCRLF;
5290      }
5291    else if (newline < 0)
5292    {    {
5293    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5294    }    }
# Line 5212  outside can help speed up starting point Line 5349  outside can help speed up starting point
5349  code = cworkspace;  code = cworkspace;
5350  *code = OP_BRA;  *code = OP_BRA;
5351  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,  (void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS,
5352    &code, &ptr, &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, &length);    &code, &ptr, &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd,
5353      &length);
5354  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;  if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
5355    
5356  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,  DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
# Line 5280  ptr = (const uschar *)pattern; Line 5418  ptr = (const uschar *)pattern;
5418  code = (uschar *)codestart;  code = (uschar *)codestart;
5419  *code = OP_BRA;  *code = OP_BRA;
5420  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,  (void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr,
5421    &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
5422  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
5423  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
5424    
# Line 5325  if (errorcode != 0) Line 5463  if (errorcode != 0)
5463    (pcre_free)(re);    (pcre_free)(re);
5464    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5465    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5466  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5467    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5468    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5469    return NULL;    return NULL;
# Line 5417  if ((re->options & PCRE_REQCHSET) != 0) Line 5553  if ((re->options & PCRE_REQCHSET) != 0)
5553      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5554    }    }
5555    
5556  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5557    
5558  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5559  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.107  
changed lines
  Added in v.175

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12