/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 171 by ph10, Mon Jun 4 14:28:58 2007 UTC revision 172 by ph10, Tue Jun 5 10:40:13 2007 UTC
# Line 243  static const char *error_texts[] = { Line 243  static const char *error_texts[] = {
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by a braced name or an optionally braced non-zero number",    "\\g is not followed by a braced name or an optionally braced non-zero number",
246    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 453  else Line 453  else
453    
454      /* \g must be followed by a number, either plain or braced. If positive, it      /* \g must be followed by a number, either plain or braced. If positive, it
455      is an absolute backreference. If negative, it is a relative backreference.      is an absolute backreference. If negative, it is a relative backreference.
456      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a      This is a Perl 5.10 feature. Perl 5.10 also supports \g{name} as a
457      reference to a named group. This is part of Perl's movement towards a      reference to a named group. This is part of Perl's movement towards a
458      unified syntax for back references. As this is synonymous with \k{name}, we      unified syntax for back references. As this is synonymous with \k{name}, we
459      fudge it up by pretending it really was \k. */      fudge it up by pretending it really was \k. */
460    
461      case 'g':      case 'g':
# Line 464  else Line 464  else
464        const uschar *p;        const uschar *p;
465        for (p = ptr+2; *p != 0 && *p != '}'; p++)        for (p = ptr+2; *p != 0 && *p != '}'; p++)
466          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;          if (*p != '-' && (digitab[*p] & ctype_digit) == 0) break;
467        if (*p != 0 && *p != '}')        if (*p != 0 && *p != '}')
468          {          {
469          c = -ESC_k;          c = -ESC_k;
470          break;          break;
471          }          }
472        braced = TRUE;        braced = TRUE;
473        ptr++;        ptr++;
474        }        }
# Line 1381  for (code = first_significant_code(code Line 1381  for (code = first_significant_code(code
1381    const uschar *ccode;    const uschar *ccode;
1382    
1383    c = *code;    c = *code;
1384    
1385    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1386    
1387    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO)
1388      {      {
1389        code += _pcre_OP_lengths[c];
1390      do code += GET(code, 1); while (*code == OP_ALT);      do code += GET(code, 1); while (*code == OP_ALT);
1391      c = *code;      c = *code;
1392      continue;      continue;
1393      }      }
1394    
1395    /* For other groups, scan the branches. */    /* For other groups, scan the branches. */
1396    
1397    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)    if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE)
1398      {      {
1399      BOOL empty_branch;      BOOL empty_branch;
# Line 1409  for (code = first_significant_code(code Line 1410  for (code = first_significant_code(code
1410        }        }
1411      while (*code == OP_ALT);      while (*code == OP_ALT);
1412      if (!empty_branch) return FALSE;   /* All branches are non-empty */      if (!empty_branch) return FALSE;   /* All branches are non-empty */
1413      c = *code;      c = *code;
1414      continue;      continue;
1415      }      }
1416    
# Line 2113  for (;; ptr++) Line 2114  for (;; ptr++)
2114    int class_lastchar;    int class_lastchar;
2115    int newoptions;    int newoptions;
2116    int recno;    int recno;
2117    int refsign;    int refsign;
2118    int skipbytes;    int skipbytes;
2119    int subreqbyte;    int subreqbyte;
2120    int subfirstbyte;    int subfirstbyte;
# Line 3640  for (;; ptr++) Line 3641  for (;; ptr++)
3641    
3642          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3643          skipbytes = 3;          skipbytes = 3;
3644          refsign = -1;          refsign = -1;
3645    
3646          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3647    
# Line 3664  for (;; ptr++) Line 3665  for (;; ptr++)
3665            terminator = '\'';            terminator = '\'';
3666            ptr++;            ptr++;
3667            }            }
3668          else          else
3669            {            {
3670            terminator = 0;            terminator = 0;
3671            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);            if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3672            }            }
3673    
3674          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3675    
# Line 3706  for (;; ptr++) Line 3707  for (;; ptr++)
3707          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3708          reference. If the string started with "+" or "-" we require the rest to          reference. If the string started with "+" or "-" we require the rest to
3709          be digits, in which case recno will be set. */          be digits, in which case recno will be set. */
3710    
3711          if (refsign > 0)          if (refsign > 0)
3712            {            {
3713            if (recno <= 0)            if (recno <= 0)
3714              {              {
3715              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
3716              goto FAILED;              goto FAILED;
3717              }              }
3718            if (refsign == '-')            if (refsign == '-')
3719              {              {
3720              recno = cd->bracount - recno + 1;              recno = cd->bracount - recno + 1;
3721              if (recno <= 0)              if (recno <= 0)
3722                {                {
3723                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
3724                goto FAILED;                goto FAILED;
3725                }                }
3726              }              }
3727            else recno += cd->bracount;            else recno += cd->bracount;
3728            PUT2(code, 2+LINK_SIZE, recno);            PUT2(code, 2+LINK_SIZE, recno);
3729            break;            break;
3730            }            }
3731    
3732          /* Otherwise (did not start with "+" or "-"), start by looking for the          /* Otherwise (did not start with "+" or "-"), start by looking for the
3733          name. */          name. */
3734    
3735          slot = cd->name_table;          slot = cd->name_table;
3736          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
3737            {            {
# Line 4055  for (;; ptr++) Line 4056  for (;; ptr++)
4056            const uschar *called;            const uschar *called;
4057    
4058            if ((refsign = *ptr) == '+') ptr++;            if ((refsign = *ptr) == '+') ptr++;
4059            else if (refsign == '-')            else if (refsign == '-')
4060              {              {
4061              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)
4062                goto OTHER_CHAR_AFTER_QUERY;                goto OTHER_CHAR_AFTER_QUERY;
4063              ptr++;              ptr++;
4064              }              }
4065    
4066            recno = 0;            recno = 0;
4067            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4068              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
# Line 4071  for (;; ptr++) Line 4072  for (;; ptr++)
4072              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4073              goto FAILED;              goto FAILED;
4074              }              }
4075    
4076            if (refsign == '-')            if (refsign == '-')
4077              {              {
4078              if (recno == 0)              if (recno == 0)
4079                {                {
4080                *errorcodeptr = ERR58;                *errorcodeptr = ERR58;
4081                goto FAILED;                goto FAILED;
4082                }                }
4083              recno = cd->bracount - recno + 1;              recno = cd->bracount - recno + 1;
4084              if (recno <= 0)              if (recno <= 0)
4085                {                {
4086                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
4087                goto FAILED;                goto FAILED;
4088                }                }
4089              }              }
4090            else if (refsign == '+')            else if (refsign == '+')
4091              {              {
# Line 4092  for (;; ptr++) Line 4093  for (;; ptr++)
4093                {                {
4094                *errorcodeptr = ERR58;                *errorcodeptr = ERR58;
4095                goto FAILED;                goto FAILED;
4096                }                }
4097              recno += cd->bracount;              recno += cd->bracount;
4098              }              }
4099    
4100            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4101    
# Line 4168  for (;; ptr++) Line 4169  for (;; ptr++)
4169    
4170          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4171          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4172          OTHER_CHAR_AFTER_QUERY:          OTHER_CHAR_AFTER_QUERY:
4173          set = unset = 0;          set = unset = 0;
4174          optset = &set;          optset = &set;
4175    
# Line 4319  for (;; ptr++) Line 4320  for (;; ptr++)
4320      is on the bracket. */      is on the bracket. */
4321    
4322      /* If this is a conditional bracket, check that there are no more than      /* If this is a conditional bracket, check that there are no more than
4323      two branches in the group, or just one if it's a DEFINE group. */      two branches in the group, or just one if it's a DEFINE group. We do this
4324        in the real compile phase, not in the pre-pass, where the whole group may
4325        not be available. */
4326    
4327      if (bravalue == OP_COND)      if (bravalue == OP_COND && lengthptr == NULL)
4328        {        {
4329        uschar *tc = code;        uschar *tc = code;
4330        int condcount = 0;        int condcount = 0;
# Line 4653  This function is used during the pre-com Line 4656  This function is used during the pre-com
4656  out the amount of memory needed, as well as during the real compile phase. The  out the amount of memory needed, as well as during the real compile phase. The
4657  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
4658    
4659  Argument:  Arguments:
4660    options        option bits, including any changes for this subpattern    options        option bits, including any changes for this subpattern
4661    oldims         previous settings of ims option bits    oldims         previous settings of ims option bits
4662    codeptr        -> the address of the current code pointer    codeptr        -> the address of the current code pointer
# Line 4806  for (;;) Line 4809  for (;;)
4809        }        }
4810      }      }
4811    
4812    /* Reached end of expression, either ')' or end of pattern. Go back through    /* Reached end of expression, either ')' or end of pattern. In the real
4813    the alternative branches and reverse the chain of offsets, with the field in    compile phase, go back through the alternative branches and reverse the chain
4814    the BRA item now becoming an offset to the first alternative. If there are    of offsets, with the field in the BRA item now becoming an offset to the
4815    no alternatives, it points to the end of the group. The length in the    first alternative. If there are no alternatives, it points to the end of the
4816    terminating ket is always the length of the whole bracketed item. If any of    group. The length in the terminating ket is always the length of the whole
4817    the ims options were changed inside the group, compile a resetting op-code    bracketed item. If any of the ims options were changed inside the group,
4818    following, except at the very end of the pattern. Return leaving the pointer    compile a resetting op-code following, except at the very end of the pattern.
4819    at the terminating char. */    Return leaving the pointer at the terminating char. */
4820    
4821    if (*ptr != '|')    if (*ptr != '|')
4822      {      {
4823      int branch_length = code - last_branch;      if (lengthptr == NULL)
     do  
4824        {        {
4825        int prev_length = GET(last_branch, 1);        int branch_length = code - last_branch;
4826        PUT(last_branch, 1, branch_length);        do
4827        branch_length = prev_length;          {
4828        last_branch -= branch_length;          int prev_length = GET(last_branch, 1);
4829            PUT(last_branch, 1, branch_length);
4830            branch_length = prev_length;
4831            last_branch -= branch_length;
4832            }
4833          while (branch_length > 0);
4834        }        }
     while (branch_length > 0);  
4835    
4836      /* Fill in the ket */      /* Fill in the ket */
4837    
# Line 4852  for (;;) Line 4858  for (;;)
4858      return TRUE;      return TRUE;
4859      }      }
4860    
4861    /* Another branch follows; insert an "or" node. Its length field points back    /* Another branch follows. In the pre-compile phase, we can move the code
4862      pointer back to where it was for the start of the first branch. (That is,
4863      pretend that each branch is the only one.)
4864    
4865      In the real compile phase, insert an ALT node. Its length field points back
4866    to the previous branch while the bracket remains open. At the end the chain    to the previous branch while the bracket remains open. At the end the chain
4867    is reversed. It's done like this so that the start of the bracket has a    is reversed. It's done like this so that the start of the bracket has a
4868    zero offset until it is closed, making it possible to detect recursion. */    zero offset until it is closed, making it possible to detect recursion. */
4869    
4870    *code = OP_ALT;    if (lengthptr != NULL)
4871    PUT(code, 1, code - last_branch);      {
4872    bc.current = last_branch = code;      code = *codeptr + 1 + LINK_SIZE + skipbytes;
4873    code += 1 + LINK_SIZE;      length += 1 + LINK_SIZE;
4874        }
4875      else
4876        {
4877        *code = OP_ALT;
4878        PUT(code, 1, code - last_branch);
4879        bc.current = last_branch = code;
4880        code += 1 + LINK_SIZE;
4881        }
4882    
4883    ptr++;    ptr++;
   length += 1 + LINK_SIZE;  
4884    }    }
4885  /* Control never reaches here */  /* Control never reaches here */
4886  }  }

Legend:
Removed from v.171  
changed lines
  Added in v.172

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12