/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 187  Arguments: Line 196  Arguments:
196  Returns:     nothing  Returns:     nothing
197  */  */
198    
199  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
200    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
201  {  {
202  int c;  int c;
203  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 977  for (;; ptr++) Line 987  for (;; ptr++)
987            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
988            }            }
989    
990          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
991    
992            if (repeat_max < 0)
993              {
994              *code++ = c;
995              *code++ = OP_STAR + repeat_type;
996              }
997    
998            /* Else insert an UPTO if the max is greater than the min. */
999    
1000          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1001            {            {
1002            *code++ = c;            *code++ = c;
1003            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 1023  for (;; ptr++) Line 1041  for (;; ptr++)
1041      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1042        {        {
1043        int i;        int i;
1044        int length = code - previous;        int len = code - previous;
1045    
1046        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1047          {          {
# Line 1040  for (;; ptr++) Line 1058  for (;; ptr++)
1058          {          {
1059          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1060            {            {
1061            memcpy(code, previous, length);            memcpy(code, previous, len);
1062            code += length;            code += len;
1063            }            }
1064          }          }
1065    
# Line 1053  for (;; ptr++) Line 1071  for (;; ptr++)
1071          {          {
1072          if (repeat_min == 0)          if (repeat_min == 0)
1073            {            {
1074            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1075            code++;            code++;
1076            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1077            }            }
1078    
1079          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1080            {            {
1081            memcpy(code, previous, length);            memcpy(code, previous, len);
1082            code += length;            code += len;
1083            }            }
1084    
1085          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1086            {            {
1087            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1088            memcpy(code, previous, length);            memcpy(code, previous, len);
1089            code += length;            code += len;
1090            }            }
1091          }          }
1092    
# Line 1529  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1547  if ((options & ~PUBLIC_OPTIONS) != 0)
1547    return NULL;    return NULL;
1548    }    }
1549    
1550  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1551  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1552    
1553  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1554  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1647  while ((c = *(++ptr)) != 0) Line 1663  while ((c = *(++ptr)) != 0)
1663        {        {
1664        if (*ptr == '\\')        if (*ptr == '\\')
1665          {          {
1666          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1667          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1668          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1669          }          }
1670        else class_charcount++;        else class_charcount++;
1671        ptr++;        ptr++;
# Line 1664  while ((c = *(++ptr)) != 0) Line 1680  while ((c = *(++ptr)) != 0)
1680    
1681        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1682    
1683        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1684          {          {
1685          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1686          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1772  while ((c = *(++ptr)) != 0) Line 1788  while ((c = *(++ptr)) != 0)
1788      continue;      continue;
1789    
1790      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1791      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1792        0 this is an unmatched bracket which will generate an error, but take care
1793        not to try to access brastack[-1]. */
1794    
1795      case ')':      case ')':
1796      length += 3;      length += 3;
1797        {        {
1798        int min = 1;        int minval = 1;
1799        int max = 1;        int maxval = 1;
1800        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1801    
1802        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1803        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1804    
1805        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1806          {          {
1807          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1808          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1809          }          }
1810        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1811        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1812        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1813    
1814        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1815        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1816        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1817        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1818    
1819        if (min == 0) length++;        if (minval == 0) length++;
1820          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1821        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1822        }        }
   
1823      continue;      continue;
1824    
1825      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1863  if (length > 65539) Line 1880  if (length > 65539)
1880    }    }
1881    
1882  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1883  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1884    rather than just "code", because it has been reported that one broken compiler
1885    fails on "code" because it is also an independent variable. It should make no
1886    difference to the value of the offsetof(). */
1887    
1888  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1889  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1890    
1891  if (re == NULL)  if (re == NULL)
# Line 1874  if (re == NULL) Line 1894  if (re == NULL)
1894    return NULL;    return NULL;
1895    }    }
1896    
1897    /* Put in the magic number and the options. */
1898    
1899  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1900  re->options = options;  re->options = options;
1901    
# Line 1924  if ((options & PCRE_ANCHORED) == 0) Line 1946  if ((options & PCRE_ANCHORED) == 0)
1946      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1947    else    else
1948      {      {
1949      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1950      if (c >= 0)      if (ch >= 0)
1951        {        {
1952        re->first_char = c;        re->first_char = ch;
1953        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1954        }        }
1955      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2064  while (code < code_end) Line 2086  while (code < code_end)
2086    
2087      case OP_REF:      case OP_REF:
2088      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2089      break;      code ++;
2090        goto CLASS_REF_REPEAT;
2091    
2092      case OP_CLASS:      case OP_CLASS:
2093        {        {
# Line 2094  while (code < code_end) Line 2117  while (code < code_end)
2117        printf("]");        printf("]");
2118        code += 32;        code += 32;
2119    
2120          CLASS_REF_REPEAT:
2121    
2122        switch(*code)        switch(*code)
2123          {          {
2124          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2278  for (;;) Line 2303  for (;;)
2303      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2304      int save_offset1 = 0, save_offset2 = 0;      int save_offset1 = 0, save_offset2 = 0;
2305    
2306      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2307    
2308      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2309        {        {
# Line 2288  for (;;) Line 2311  for (;;)
2311        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2312        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2313    
2314        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2315        }        }
2316    
2317      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2302  for (;;) Line 2323  for (;;)
2323        }        }
2324      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2325    
2326      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2327    
2328      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2329        {        {
# Line 2443  for (;;) Line 2462  for (;;)
2462    
2463        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2464    
2465        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2466    
2467        if (number > 0)        if (number > 0)
2468          {          {
# Line 2813  for (;;) Line 2830  for (;;)
2830        register int length = ecode[1];        register int length = ecode[1];
2831        ecode += 2;        ecode += 2;
2832    
2833        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2834        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2835          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2836        else        else
# Line 2824  for (;;) Line 2841  for (;;)
2841          }          }
2842        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2843        printf("\n");        printf("\n");
2844        #endif  #endif
2845    
2846        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2847        if (md->caseless)        if (md->caseless)
# Line 2881  for (;;) Line 2898  for (;;)
2898      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2899      characters and work backwards. */      characters and work backwards. */
2900    
2901      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2902      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2903    
2904      if (md->caseless)      if (md->caseless)
2905        {        {
# Line 2949  for (;;) Line 2964  for (;;)
2964      /* Match a negated single character */      /* Match a negated single character */
2965    
2966      case OP_NOT:      case OP_NOT:
2967      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
2968      ecode++;      ecode++;
2969      if (md->caseless)      if (md->caseless)
2970        {        {
# Line 3008  for (;;) Line 3023  for (;;)
3023      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3024      characters and work backwards. */      characters and work backwards. */
3025    
3026      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3027      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3028    
3029      if (md->caseless)      if (md->caseless)
3030        {        {
# Line 3261  for (;;) Line 3274  for (;;)
3274      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3275    
3276      default:      default:
3277      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3278      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3279      return FALSE;      return FALSE;
3280      }      }
# Line 3279  for (;;) Line 3290  for (;;)
3290    
3291    
3292  /*************************************************  /*************************************************
3293    *         Segregate setjmp()                     *
3294    *************************************************/
3295    
3296    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3297    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3298    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3299    since it's needed only for the extension \X option, and with any luck, a good
3300    compiler will spot the tail recursion and compile it efficiently.
3301    
3302    Arguments:    The block containing the match data
3303    Returns:      The return from setjump()
3304    */
3305    
3306    static int
3307    my_setjmp(match_data *match_block)
3308    {
3309    return setjmp(match_block->fail_env);
3310    }
3311    
3312    
3313    
3314    /*************************************************
3315  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3316  *************************************************/  *************************************************/
3317    
# Line 3310  int ocount = offsetcount; Line 3343  int ocount = offsetcount;
3343  int first_char = -1;  int first_char = -1;
3344  match_data match_block;  match_data match_block;
3345  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3346  const uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3347  const uschar *end_subject;  const uschar *end_subject;
3348  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3349  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
# Line 3351  if (re->top_backref > 0 && re->top_backr Line 3384  if (re->top_backref > 0 && re->top_backr
3384    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3385    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3386    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3387    #ifdef DEBUG    DPRINTF(("Got memory to hold back references\n"));
   printf("Got memory to hold back references\n");  
   #endif  
3388    }    }
3389  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3390    
# Line 3406  if (!anchored) Line 3437  if (!anchored)
3437    
3438  do  do
3439    {    {
3440      int rc;
3441    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3442    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3443    
# Line 3447  do Line 3479  do
3479        }        }
3480      }      }
3481    
3482    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3483    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3484    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3485    printf("\n");    printf("\n");
3486    #endif  #endif
3487    
3488    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3489    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3461  do Line 3493  do
3493    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3494    
3495    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3496    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3497      is done in a separate function to avoid compiler warnings. We need not do
3498    if (setjmp(match_block.fail_env) == 0 &&    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3499        match(start_match, re->code, 2, &match_block))    enabled. */
     {  
     int rc;  
3500    
3501      if (ocount != offsetcount)    if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) ||
3502        {        !match(start_match, re->code, 2, &match_block))
3503        if (offsetcount >= 4)      continue;
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3504    
3505        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3506    
3507        (pcre_free)(match_block.offset_vector);    if (ocount != offsetcount)
3508        {
3509        if (offsetcount >= 4)
3510          {
3511          memcpy(offsets + 2, match_block.offset_vector + 2,
3512            (offsetcount - 2) * sizeof(int));
3513          DPRINTF(("Copied offsets; freeing temporary memory\n"));
3514        }        }
3515        if (match_block.end_offset_top > offsetcount)
3516          match_block.offset_overflow = TRUE;
3517    
3518      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3519        (pcre_free)(match_block.offset_vector);
3520        }
3521    
3522      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3523    
3524      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3525      printf(">>>> returning %d\n", rc);      {
3526      #endif      offsets[0] = start_match - match_block.start_subject;
3527      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3528      }      }
3529    
3530      DPRINTF((">>>> returning %d\n", rc));
3531      return rc;
3532    }    }
3533  while (!anchored &&  while (!anchored &&
3534         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3535         start_match++ < end_subject);         start_match++ < end_subject);
3536    
3537  #ifdef DEBUG  DPRINTF((">>>> returning %d\n", match_block.errorcode));
 printf(">>>> returning %d\n", match_block.errorcode);  
 #endif  
3538    
3539  return match_block.errorcode;  return match_block.errorcode;
3540  }  }

Legend:
Removed from v.7  
changed lines
  Added in v.9

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12