/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 274 by ph10, Tue Nov 20 10:05:23 2007 UTC revision 295 by ph10, Mon Dec 31 17:00:24 2007 UTC
# Line 1508  for (;;) Line 1508  for (;;)
1508  can match the empty string or not. It is called from could_be_empty()  can match the empty string or not. It is called from could_be_empty()
1509  below and from compile_branch() when checking for an unlimited repeat of a  below and from compile_branch() when checking for an unlimited repeat of a
1510  group that can match nothing. Note that first_significant_code() skips over  group that can match nothing. Note that first_significant_code() skips over
1511  assertions. If we hit an unclosed bracket, we return "empty" - this means we've  backward and negative forward assertions when its final argument is TRUE. If we
1512  struck an inner bracket whose current branch will already have been scanned.  hit an unclosed bracket, we return "empty" - this means we've struck an inner
1513    bracket whose current branch will already have been scanned.
1514    
1515  Arguments:  Arguments:
1516    code        points to start of search    code        points to start of search
# Line 1531  for (code = first_significant_code(code Line 1532  for (code = first_significant_code(code
1532    
1533    c = *code;    c = *code;
1534    
1535      /* Skip over forward assertions; the other assertions are skipped by
1536      first_significant_code() with a TRUE final argument. */
1537    
1538      if (c == OP_ASSERT)
1539        {
1540        do code += GET(code, 1); while (*code == OP_ALT);
1541        c = *code;
1542        continue;
1543        }
1544    
1545    /* Groups with zero repeats can of course be empty; skip them. */    /* Groups with zero repeats can of course be empty; skip them. */
1546    
1547    if (c == OP_BRAZERO || c == OP_BRAMINZERO)    if (c == OP_BRAZERO || c == OP_BRAMINZERO)
# Line 1726  return TRUE; Line 1737  return TRUE;
1737  *************************************************/  *************************************************/
1738    
1739  /* This function is called when the sequence "[:" or "[." or "[=" is  /* This function is called when the sequence "[:" or "[." or "[=" is
1740  encountered in a character class. It checks whether this is followed by an  encountered in a character class. It checks whether this is followed by a
1741  optional ^ and then a sequence of letters, terminated by a matching ":]" or  sequence of characters terminated by a matching ":]" or ".]" or "=]". If we
1742  ".]" or "=]".  reach an unescaped ']' without the special preceding character, return FALSE.
1743    
1744    Originally, this function only recognized a sequence of letters between the
1745    terminators, but it seems that Perl recognizes any sequence of characters,
1746    though of course unknown POSIX names are subsequently rejected. Perl gives an
1747    "Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
1748    didn't consider this to be a POSIX class. Likewise for [:1234:].
1749    
1750    The problem in trying to be exactly like Perl is in the handling of escapes. We
1751    have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
1752    class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
1753    below handles the special case of \], but does not try to do any other escape
1754    processing. This makes it different from Perl for cases such as [:l\ower:]
1755    where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize
1756    "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
1757    I think.
1758    
1759  Argument:  Arguments:
1760    ptr      pointer to the initial [    ptr      pointer to the initial [
1761    endptr   where to return the end pointer    endptr   where to return the end pointer
   cd       pointer to compile data  
1762    
1763  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1764  */  */
1765    
1766  static BOOL  static BOOL
1767  check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)  check_posix_syntax(const uschar *ptr, const uschar **endptr)
1768  {  {
1769  int terminator;          /* Don't combine these lines; the Solaris cc */  int terminator;          /* Don't combine these lines; the Solaris cc */
1770  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
1771  if (*(++ptr) == '^') ptr++;  for (++ptr; *ptr != 0; ptr++)
 while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;  
 if (*ptr == terminator && ptr[1] == ']')  
1772    {    {
1773    *endptr = ptr;    if (*ptr == '\\' && ptr[1] == ']') ptr++; else
1774    return TRUE;      {
1775    }      if (*ptr == ']') return FALSE;
1776        if (*ptr == terminator && ptr[1] == ']')
1777          {
1778          *endptr = ptr;
1779          return TRUE;
1780          }
1781        }
1782      }
1783  return FALSE;  return FALSE;
1784  }  }
1785    
# Line 2385  req_caseopt = ((options & PCRE_CASELESS) Line 2415  req_caseopt = ((options & PCRE_CASELESS)
2415  for (;; ptr++)  for (;; ptr++)
2416    {    {
2417    BOOL negate_class;    BOOL negate_class;
2418    BOOL should_flip_negation;    BOOL should_flip_negation;
2419    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2420    BOOL is_quantifier;    BOOL is_quantifier;
2421    BOOL is_recurse;    BOOL is_recurse;
# Line 2609  for (;; ptr++) Line 2639  for (;; ptr++)
2639      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
2640    
2641      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&      if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
2642          check_posix_syntax(ptr, &tempptr, cd))          check_posix_syntax(ptr, &tempptr))
2643        {        {
2644        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;        *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
2645        goto FAILED;        goto FAILED;
# Line 2634  for (;; ptr++) Line 2664  for (;; ptr++)
2664        else break;        else break;
2665        }        }
2666    
2667      /* If a class contains a negative special such as \S, we need to flip the      /* If a class contains a negative special such as \S, we need to flip the
2668      negation flag at the end, so that support for characters > 255 works      negation flag at the end, so that support for characters > 255 works
2669      correctly (they are all included in the class). */      correctly (they are all included in the class). */
2670    
2671      should_flip_negation = FALSE;      should_flip_negation = FALSE;
# Line 2695  for (;; ptr++) Line 2725  for (;; ptr++)
2725    
2726        if (c == '[' &&        if (c == '[' &&
2727            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&            (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
2728            check_posix_syntax(ptr, &tempptr, cd))            check_posix_syntax(ptr, &tempptr))
2729          {          {
2730          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
2731          int posix_class, taboffset, tabopt;          int posix_class, taboffset, tabopt;
# Line 2712  for (;; ptr++) Line 2742  for (;; ptr++)
2742          if (*ptr == '^')          if (*ptr == '^')
2743            {            {
2744            local_negate = TRUE;            local_negate = TRUE;
2745            should_flip_negation = TRUE;  /* Note negative special */            should_flip_negation = TRUE;  /* Note negative special */
2746            ptr++;            ptr++;
2747            }            }
2748    
# Line 2787  for (;; ptr++) Line 2817  for (;; ptr++)
2817          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);          c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
2818          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
2819    
2820          if (-c == ESC_b) c = '\b';       /* \b is backslash in a class */          if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
2821          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */          else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
2822          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */          else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
2823          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
# Line 2815  for (;; ptr++) Line 2845  for (;; ptr++)
2845              continue;              continue;
2846    
2847              case ESC_D:              case ESC_D:
2848              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2849              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
2850              continue;              continue;
2851    
# Line 2824  for (;; ptr++) Line 2854  for (;; ptr++)
2854              continue;              continue;
2855    
2856              case ESC_W:              case ESC_W:
2857              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2858              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
2859              continue;              continue;
2860    
# Line 2834  for (;; ptr++) Line 2864  for (;; ptr++)
2864              continue;              continue;
2865    
2866              case ESC_S:              case ESC_S:
2867              should_flip_negation = TRUE;              should_flip_negation = TRUE;
2868              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
2869              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
2870              continue;              continue;
2871    
             case ESC_E: /* Perl ignores an orphan \E */  
             continue;  
   
2872              default:    /* Not recognized; fall through */              default:    /* Not recognized; fall through */
2873              break;      /* Need "default" setting to stop compiler warning. */              break;      /* Need "default" setting to stop compiler warning. */
2874              }              }
# Line 3076  for (;; ptr++) Line 3103  for (;; ptr++)
3103            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3104            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3105    
3106            /* \b is backslash; \X is literal X; \R is literal R; any other            /* \b is backspace; \X is literal X; \R is literal R; any other
3107            special means the '-' was literal */            special means the '-' was literal */
3108    
3109            if (d < 0)            if (d < 0)
# Line 3340  we set the flag only if there is a liter Line 3367  we set the flag only if there is a liter
3367      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
3368    
3369      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
3370      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
3371      such as \S in the class, because in that case all characters > 255 are in      such as \S in the class, because in that case all characters > 255 are in
3372      the class, so any that were explicitly given as well can be ignored. If      the class, so any that were explicitly given as well can be ignored. If
3373      (when there are explicit characters > 255 that must be listed) there are no      (when there are explicit characters > 255 that must be listed) there are no
3374      characters < 256, we can omit the bitmap in the actual compiled code. */      characters < 256, we can omit the bitmap in the actual compiled code. */
3375    
# Line 3373  we set the flag only if there is a liter Line 3400  we set the flag only if there is a liter
3400        }        }
3401  #endif  #endif
3402    
3403      /* If there are no characters > 255, set the opcode to OP_CLASS or      /* If there are no characters > 255, set the opcode to OP_CLASS or
3404      OP_NCLASS, depending on whether the whole class was negated and whether      OP_NCLASS, depending on whether the whole class was negated and whether
3405      there were negative specials such as \S in the class. Then copy the 32-byte      there were negative specials such as \S in the class. Then copy the 32-byte
3406      map into the code vector, negating it if necessary. */      map into the code vector, negating it if necessary. */
3407    
3408      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
3409      if (negate_class)      if (negate_class)
3410        {        {
# Line 4021  we set the flag only if there is a liter Line 4048  we set the flag only if there is a liter
4048        int len;        int len;
4049        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||        if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
4050            *tempcode == OP_NOTEXACT)            *tempcode == OP_NOTEXACT)
4051          tempcode += _pcre_OP_lengths[*tempcode];          tempcode += _pcre_OP_lengths[*tempcode] +
4052              ((*tempcode == OP_TYPEEXACT &&
4053                 (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
4054        len = code - tempcode;        len = code - tempcode;
4055        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4056          {          {
# Line 4248  we set the flag only if there is a liter Line 4277  we set the flag only if there is a liter
4277              *errorcodeptr = ERR58;              *errorcodeptr = ERR58;
4278              goto FAILED;              goto FAILED;
4279              }              }
4280            recno = (refsign == '-')?            recno = (refsign == '-')?
4281              cd->bracount - recno + 1 : recno +cd->bracount;              cd->bracount - recno + 1 : recno +cd->bracount;
4282            if (recno <= 0 || recno > cd->final_bracount)            if (recno <= 0 || recno > cd->final_bracount)
4283              {              {
# Line 4327  we set the flag only if there is a liter Line 4356  we set the flag only if there is a liter
4356            }            }
4357    
4358          /* Check for the "name" actually being a subpattern number. We are          /* Check for the "name" actually being a subpattern number. We are
4359          in the second pass here, so final_bracount is set. */          in the second pass here, so final_bracount is set. */
4360    
4361          else if (recno > 0 && recno <= cd->final_bracount)          else if (recno > 0 && recno <= cd->final_bracount)
4362            {            {
# Line 4541  we set the flag only if there is a liter Line 4570  we set the flag only if there is a liter
4570              {              {
4571              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
4572              goto FAILED;              goto FAILED;
4573              }              }
4574            if (*ptr != terminator)            if (*ptr != terminator)
4575              {              {
4576              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4555  we set the flag only if there is a liter Line 4584  we set the flag only if there is a liter
4584            recno = 0;            recno = 0;
4585            }            }
4586    
4587          /* In the real compile, seek the name in the table. We check the name          /* In the real compile, seek the name in the table. We check the name
4588          first, and then check that we have reached the end of the name in the          first, and then check that we have reached the end of the name in the
4589          table. That way, if the name that is longer than any in the table,          table. That way, if the name that is longer than any in the table,
4590          the comparison will fail without reading beyond the table entry. */          the comparison will fail without reading beyond the table entry. */
4591    
# Line 4566  we set the flag only if there is a liter Line 4595  we set the flag only if there is a liter
4595            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4596              {              {
4597              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4598                  slot[2+namelen] == 0)                  slot[2+namelen] == 0)
4599                break;                break;
4600              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4601              }              }
# Line 4604  we set the flag only if there is a liter Line 4633  we set the flag only if there is a liter
4633            {            {
4634            const uschar *called;            const uschar *called;
4635    
4636            if ((refsign = *ptr) == '+')            if ((refsign = *ptr) == '+')
4637              {              {
4638              ptr++;              ptr++;
4639              if ((digitab[*ptr] & ctype_digit) == 0)              if ((digitab[*ptr] & ctype_digit) == 0)
4640                {                {
4641                *errorcodeptr = ERR63;                *errorcodeptr = ERR63;
4642                goto FAILED;                goto FAILED;
4643                }                }
4644              }              }
4645            else if (refsign == '-')            else if (refsign == '-')
4646              {              {
4647              if ((digitab[ptr[1]] & ctype_digit) == 0)              if ((digitab[ptr[1]] & ctype_digit) == 0)

Legend:
Removed from v.274  
changed lines
  Added in v.295

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12