/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 116 by ph10, Fri Mar 9 15:23:02 2007 UTC revision 170 by ph10, Mon Jun 4 11:21:13 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 87  static const short int escapes[] = { Line 87  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */     '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
90       0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */       0,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
91  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */  -ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
92  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */  -ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
93     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */     '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
# Line 208  static const char *error_texts[] = { Line 208  static const char *error_texts[] = {
208    "malformed number or name after (?(",    "malformed number or name after (?(",
209    "conditional group contains more than two branches",    "conditional group contains more than two branches",
210    "assertion expected after (?(",    "assertion expected after (?(",
211    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
212    /* 30 */    /* 30 */
213    "unknown POSIX class name",    "unknown POSIX class name",
214    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 242  static const char *error_texts[] = {
242    /* 55 */    /* 55 */
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by an (optionally braced) non-zero number",
246      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 1369  for (code = first_significant_code(code Line 1370  for (code = first_significant_code(code
1370    const uschar *ccode;    const uschar *ccode;
1371    
1372    c = *code;    c = *code;
1373    
1374      /* Groups with zero repeats can of course be empty; skip them. */
1375    
1376      if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1377        {
1378        do code += GET(code, 1); while (*code == OP_ALT);
1379        c = *code;
1380        continue;
1381        }
1382    
1383      /* For other groups, scan the branches. */
1384    
1385    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1386      {      {
1387      BOOL empty_branch;      BOOL empty_branch;
# Line 1386  for (code = first_significant_code(code Line 1398  for (code = first_significant_code(code
1398        }        }
1399      while (*code == OP_ALT);      while (*code == OP_ALT);
1400      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1401        c = *code;
     /* Move past the KET and fudge things so that the increment in the "for"  
     above has no effect. */  
   
     c = OP_END;  
     code += 1 + LINK_SIZE - _pcre_OP_lengths[c];  
1402      continue;      continue;
1403      }      }
1404    
# Line 2095  for (;; ptr++) Line 2102  for (;; ptr++)
2102    int class_lastchar;    int class_lastchar;
2103    int newoptions;    int newoptions;
2104    int recno;    int recno;
2105      int refsign;
2106    int skipbytes;    int skipbytes;
2107    int subreqbyte;    int subreqbyte;
2108    int subfirstbyte;    int subfirstbyte;
# Line 3621  for (;; ptr++) Line 3629  for (;; ptr++)
3629    
3630          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3631          skipbytes = 3;          skipbytes = 3;
3632            refsign = -1;
3633    
3634          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3635    
# Line 3644  for (;; ptr++) Line 3653  for (;; ptr++)
3653            terminator = '\'';            terminator = '\'';
3654            ptr++;            ptr++;
3655            }            }
3656          else terminator = 0;          else
3657              {
3658              terminator = 0;
3659              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3660              }
3661    
3662          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3663    
# Line 3680  for (;; ptr++) Line 3693  for (;; ptr++)
3693          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3694    
3695          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3696          reference. */          reference. If the string started with "+" or "-" we require the rest to
3697            be digits, in which case recno will be set. */
3698    
3699            if (refsign > 0)
3700              {
3701              if (recno <= 0)
3702                {
3703                *errorcodeptr = ERR58;
3704                goto FAILED;
3705                }
3706              if (refsign == '-')
3707                {
3708                recno = cd->bracount - recno + 1;
3709                if (recno <= 0)
3710                  {
3711                  *errorcodeptr = ERR15;
3712                  goto FAILED;
3713                  }
3714                }
3715              else recno += cd->bracount;
3716              PUT2(code, 2+LINK_SIZE, recno);
3717              break;
3718              }
3719    
3720            /* Otherwise (did not start with "+" or "-"), start by looking for the
3721            name. */
3722    
3723          slot = cd->name_table;          slot = cd->name_table;
3724          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
3725            {            {
# Line 3999  for (;; ptr++) Line 4037  for (;; ptr++)
4037    
4038    
4039          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4040            case '-': case '+':
4041          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4042          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4043            {            {
4044            const uschar *called;            const uschar *called;
4045    
4046              if ((refsign = *ptr) == '+') ptr++;
4047              else if (refsign == '-')
4048                {
4049                if ((digitab[ptr[1]] & ctype_digit) == 0)
4050                  goto OTHER_CHAR_AFTER_QUERY;
4051                ptr++;
4052                }
4053    
4054            recno = 0;            recno = 0;
4055            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4056              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4057    
4058            if (*ptr != ')')            if (*ptr != ')')
4059              {              {
4060              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4061              goto FAILED;              goto FAILED;
4062              }              }
4063    
4064              if (refsign == '-')
4065                {
4066                if (recno == 0)
4067                  {
4068                  *errorcodeptr = ERR58;
4069                  goto FAILED;
4070                  }
4071                recno = cd->bracount - recno + 1;
4072                if (recno <= 0)
4073                  {
4074                  *errorcodeptr = ERR15;
4075                  goto FAILED;
4076                  }
4077                }
4078              else if (refsign == '+')
4079                {
4080                if (recno == 0)
4081                  {
4082                  *errorcodeptr = ERR58;
4083                  goto FAILED;
4084                  }
4085                recno += cd->bracount;
4086                }
4087    
4088            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4089    
# Line 4084  for (;; ptr++) Line 4157  for (;; ptr++)
4157    
4158          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4159          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4160            OTHER_CHAR_AFTER_QUERY:
4161          set = unset = 0;          set = unset = 0;
4162          optset = &set;          optset = &set;
4163    
# Line 5043  Returns: pointer to compiled data Line 5117  Returns: pointer to compiled data
5117                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5118  */  */
5119    
5120  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5121  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5122    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5123  {  {
# Line 5051  return pcre_compile2(pattern, options, N Line 5125  return pcre_compile2(pattern, options, N
5125  }  }
5126    
5127    
5128  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5129  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5130    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5131  {  {
# Line 5100  if (errorcodeptr != NULL) *errorcodeptr Line 5174  if (errorcodeptr != NULL) *errorcodeptr
5174  if (erroroffset == NULL)  if (erroroffset == NULL)
5175    {    {
5176    errorcode = ERR16;    errorcode = ERR16;
5177    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5178    }    }
5179    
5180  *erroroffset = 0;  *erroroffset = 0;
# Line 5113  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5187  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5187       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5188    {    {
5189    errorcode = ERR44;    errorcode = ERR44;
5190    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5191    }    }
5192  #else  #else
5193  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5138  cd->cbits = tables + cbits_offset; Line 5212  cd->cbits = tables + cbits_offset;
5212  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5213    
5214  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5215  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5216    "anycrlf". */
5217    
5218  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5219    {    {
# Line 5148  switch (options & (PCRE_NEWLINE_CRLF | P Line 5223  switch (options & (PCRE_NEWLINE_CRLF | P
5223    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5224         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5225    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5226      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5227    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5228    }    }
5229    
5230  if (newline < 0)  if (newline == -2)
5231      {
5232      cd->nltype = NLTYPE_ANYCRLF;
5233      }
5234    else if (newline < 0)
5235    {    {
5236    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5237    }    }
# Line 5325  if (errorcode != 0) Line 5405  if (errorcode != 0)
5405    (pcre_free)(re);    (pcre_free)(re);
5406    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5407    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5408  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5409    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5410    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5411    return NULL;    return NULL;

Legend:
Removed from v.116  
changed lines
  Added in v.170

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12