/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 638 by ph10, Mon Jul 25 09:41:19 2011 UTC revision 640 by ph10, Mon Jul 25 10:50:28 2011 UTC
# Line 578  return s; Line 578  return s;
578    
579    
580  /*************************************************  /*************************************************
581    *            Check for counted repeat            *
582    *************************************************/
583    
584    /* This function is called when a '{' is encountered in a place where it might
585    start a quantifier. It looks ahead to see if it really is a quantifier or not.
586    It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
587    where the ddds are digits.
588    
589    Arguments:
590      p         pointer to the first char after '{'
591    
592    Returns:    TRUE or FALSE
593    */
594    
595    static BOOL
596    is_counted_repeat(const uschar *p)
597    {
598    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
599    while ((digitab[*p] & ctype_digit) != 0) p++;
600    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
601    
602    if (*p++ != CHAR_COMMA) return FALSE;
603    if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
604    
605    if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
606    while ((digitab[*p] & ctype_digit) != 0) p++;
607    
608    return (*p == CHAR_RIGHT_CURLY_BRACKET);
609    }
610    
611    
612    
613    /*************************************************
614  *            Handle escapes                      *  *            Handle escapes                      *
615  *************************************************/  *************************************************/
616    
# Line 648  else Line 681  else
681      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
682      break;      break;
683    
684      /* \g must be followed by one of a number of specific things:      /* In a character class, \g is just a literal "g". Outside a character
685        class, \g must be followed by one of a number of specific things:
686    
687      (1) A number, either plain or braced. If positive, it is an absolute      (1) A number, either plain or braced. If positive, it is an absolute
688      backreference. If negative, it is a relative backreference. This is a Perl      backreference. If negative, it is a relative backreference. This is a Perl
# Line 665  else Line 699  else
699      the -ESC_g code (cf \k). */      the -ESC_g code (cf \k). */
700    
701      case CHAR_g:      case CHAR_g:
702        if (isclass) break;
703      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)      if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
704        {        {
705        c = -ESC_g;        c = -ESC_g;
# Line 886  else Line 921  else
921    }    }
922    
923  /* Perl supports \N{name} for character names, as well as plain \N for "not  /* Perl supports \N{name} for character names, as well as plain \N for "not
924  newline". PCRE does not support \N{name}. */  newline". PCRE does not support \N{name}. However, it does support
925    quantification such as \N{2,3}. */
926    
927  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)  if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
928         !is_counted_repeat(ptr+2))
929    *errorcodeptr = ERR37;    *errorcodeptr = ERR37;
930    
931  /* If PCRE_UCP is set, we change the values for \d etc. */  /* If PCRE_UCP is set, we change the values for \d etc. */
# Line 998  return -1; Line 1035  return -1;
1035    
1036    
1037  /*************************************************  /*************************************************
 *            Check for counted repeat            *  
 *************************************************/  
   
 /* This function is called when a '{' is encountered in a place where it might  
 start a quantifier. It looks ahead to see if it really is a quantifier or not.  
 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}  
 where the ddds are digits.  
   
 Arguments:  
   p         pointer to the first char after '{'  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 is_counted_repeat(const uschar *p)  
 {  
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if (*p++ != CHAR_COMMA) return FALSE;  
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;  
   
 if ((digitab[*p++] & ctype_digit) == 0) return FALSE;  
 while ((digitab[*p] & ctype_digit) != 0) p++;  
   
 return (*p == CHAR_RIGHT_CURLY_BRACKET);  
 }  
   
   
   
 /*************************************************  
1038  *         Read repeat counts                     *  *         Read repeat counts                     *
1039  *************************************************/  *************************************************/
1040    
# Line 2288  where Perl recognizes it as the POSIX cl Line 2292  where Perl recognizes it as the POSIX cl
2292  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,  "l\ower". This is a lesser evil that not diagnosing bad classes when Perl does,
2293  I think.  I think.
2294    
2295    A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
2296    It seems that the appearance of a nested POSIX class supersedes an apparent
2297    external class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
2298    a digit. Also, unescaped square brackets may also appear as part of class
2299    names. For example, [:a[:abc]b:] gives unknown class "[:abc]b:]"in Perl.
2300    
2301  Arguments:  Arguments:
2302    ptr      pointer to the initial [    ptr      pointer to the initial [
2303    endptr   where to return the end pointer    endptr   where to return the end pointer
# Line 2302  int terminator; /* Don't combin Line 2312  int terminator; /* Don't combin
2312  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2313  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2314    {    {
2315    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2316        ptr++;
2317      else
2318      {      {
     if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;  
2319      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2320        {        {
2321        *endptr = ptr;        *endptr = ptr;
2322        return TRUE;        return TRUE;
2323        }        }
2324        if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&
2325             (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
2326              ptr[1] == CHAR_EQUALS_SIGN) &&
2327            check_posix_syntax(ptr, endptr))
2328          return FALSE;
2329      }      }
2330    }    }
2331  return FALSE;  return FALSE;

Legend:
Removed from v.638  
changed lines
  Added in v.640

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12