/[pcre]/code/tags/pcre-1.05/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-1.05/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre.c revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC code/tags/pcre-1.05/pcre.c revision 14 by nigel, Sat Feb 24 21:38:23 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 45  the external pcre header. */ Line 54  the external pcre header. */
54  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };
55  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
56    
57  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
58    
59  #ifdef DEBUG  #ifdef DEBUG
60  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 56  static const char *OP_names[] = { Line 65  static const char *OP_names[] = {
65    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
66    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
67    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
68    "class", "Ref",    "class", "negclass", "Ref",
69    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
70    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
71  };  };
# Line 82  static short int escapes[] = { Line 91  static short int escapes[] = {
91    
92  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
93    
94  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
95      compile_regex(int, int *, uschar **, const uschar **, const char **);
96    
97  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
98  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 187  Arguments: Line 197  Arguments:
197  Returns:     nothing  Returns:     nothing
198  */  */
199    
200  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
201    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
202  {  {
203  int c;  int c;
204  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 296  do { Line 307  do {
307        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
308    
309        case OP_CLASS:        case OP_CLASS:
310          case OP_NEGCLASS:
311        case OP_REF:        case OP_REF:
312        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
313    
# Line 660  for (;; ptr++) Line 672  for (;; ptr++)
672    
673      case '[':      case '[':
674      previous = code;      previous = code;
     *code++ = OP_CLASS;  
675    
676      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
677        different opcode. This only matters if caseless matching is specified at
678        runtime. */
679    
680      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
681        {        {
682        negate_class = TRUE;        negate_class = TRUE;
683          *code++ = OP_NEGCLASS;
684        c = *(++ptr);        c = *(++ptr);
685        }        }
686      else negate_class = FALSE;      else
687          {
688          negate_class = FALSE;
689          *code++ = OP_CLASS;
690          }
691    
692      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
693      character. */      character. */
# Line 977  for (;; ptr++) Line 995  for (;; ptr++)
995            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
996            }            }
997    
998          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
999    
1000          if (repeat_max != repeat_min)          if (repeat_max < 0)
1001              {
1002              *code++ = c;
1003              *code++ = OP_STAR + repeat_type;
1004              }
1005    
1006            /* Else insert an UPTO if the max is greater than the min. */
1007    
1008            else if (repeat_max != repeat_min)
1009            {            {
1010            *code++ = c;            *code++ = c;
1011            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 997  for (;; ptr++) Line 1023  for (;; ptr++)
1023      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1024      stuff after it. */      stuff after it. */
1025    
1026      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1027                 *previous == OP_REF)
1028        {        {
1029        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1030          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1023  for (;; ptr++) Line 1050  for (;; ptr++)
1050      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1051        {        {
1052        int i;        int i;
1053        int length = code - previous;        int len = code - previous;
1054    
1055        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1056          {          {
# Line 1040  for (;; ptr++) Line 1067  for (;; ptr++)
1067          {          {
1068          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1069            {            {
1070            memcpy(code, previous, length);            memcpy(code, previous, len);
1071            code += length;            code += len;
1072            }            }
1073          }          }
1074    
# Line 1053  for (;; ptr++) Line 1080  for (;; ptr++)
1080          {          {
1081          if (repeat_min == 0)          if (repeat_min == 0)
1082            {            {
1083            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1084            code++;            code++;
1085            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1086            }            }
1087    
1088          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1089            {            {
1090            memcpy(code, previous, length);            memcpy(code, previous, len);
1091            code += length;            code += len;
1092            }            }
1093    
1094          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1095            {            {
1096            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1097            memcpy(code, previous, length);            memcpy(code, previous, len);
1098            code += length;            code += len;
1099            }            }
1100          }          }
1101    
# Line 1529  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1556  if ((options & ~PUBLIC_OPTIONS) != 0)
1556    return NULL;    return NULL;
1557    }    }
1558    
1559  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1560  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1561    
1562  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1563  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1647  while ((c = *(++ptr)) != 0) Line 1672  while ((c = *(++ptr)) != 0)
1672        {        {
1673        if (*ptr == '\\')        if (*ptr == '\\')
1674          {          {
1675          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1676          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1677          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1678          }          }
1679        else class_charcount++;        else class_charcount++;
1680        ptr++;        ptr++;
# Line 1664  while ((c = *(++ptr)) != 0) Line 1689  while ((c = *(++ptr)) != 0)
1689    
1690        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1691    
1692        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1693          {          {
1694          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1695          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1772  while ((c = *(++ptr)) != 0) Line 1797  while ((c = *(++ptr)) != 0)
1797      continue;      continue;
1798    
1799      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1800      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1801        0 this is an unmatched bracket which will generate an error, but take care
1802        not to try to access brastack[-1]. */
1803    
1804      case ')':      case ')':
1805      length += 3;      length += 3;
1806        {        {
1807        int min = 1;        int minval = 1;
1808        int max = 1;        int maxval = 1;
1809        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1810    
1811        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1812        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1813    
1814        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1815          {          {
1816          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1817          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1818          }          }
1819        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1820        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1821        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1822    
1823        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1824        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1825        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1826        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1827    
1828        if (min == 0) length++;        if (minval == 0) length++;
1829          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1830        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1831        }        }
   
1832      continue;      continue;
1833    
1834      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1863  if (length > 65539) Line 1889  if (length > 65539)
1889    }    }
1890    
1891  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1892  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1893    rather than just "code", because it has been reported that one broken compiler
1894    fails on "code" because it is also an independent variable. It should make no
1895    difference to the value of the offsetof(). */
1896    
1897  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1898  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1899    
1900  if (re == NULL)  if (re == NULL)
# Line 1874  if (re == NULL) Line 1903  if (re == NULL)
1903    return NULL;    return NULL;
1904    }    }
1905    
1906    /* Put in the magic number and the options. */
1907    
1908  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1909  re->options = options;  re->options = options;
1910    
# Line 1924  if ((options & PCRE_ANCHORED) == 0) Line 1955  if ((options & PCRE_ANCHORED) == 0)
1955      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1956    else    else
1957      {      {
1958      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1959      if (c >= 0)      if (ch >= 0)
1960        {        {
1961        re->first_char = c;        re->first_char = ch;
1962        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1963        }        }
1964      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2019  while (code < code_end) Line 2050  while (code < code_end)
2050      case OP_MINUPTO:      case OP_MINUPTO:
2051      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2052        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2053      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2054      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2055      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2056      code += 3;      code += 3;
# Line 2064  while (code < code_end) Line 2095  while (code < code_end)
2095    
2096      case OP_REF:      case OP_REF:
2097      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2098      break;      code ++;
2099        goto CLASS_REF_REPEAT;
2100    
2101      case OP_CLASS:      case OP_CLASS:
2102        case OP_NEGCLASS:
2103        {        {
2104        int i, min, max;        int i, min, max;
2105    
2106        code++;        if (*code++ == OP_CLASS) printf("    [");
2107        printf("    [");          else printf("   ^[");
2108    
2109        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2110          {          {
# Line 2094  while (code < code_end) Line 2127  while (code < code_end)
2127        printf("]");        printf("]");
2128        code += 32;        code += 32;
2129    
2130          CLASS_REF_REPEAT:
2131    
2132        switch(*code)        switch(*code)
2133          {          {
2134          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2278  for (;;) Line 2313  for (;;)
2313      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2314      int save_offset1 = 0, save_offset2 = 0;      int save_offset1 = 0, save_offset2 = 0;
2315    
2316      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2317    
2318      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2319        {        {
# Line 2288  for (;;) Line 2321  for (;;)
2321        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2322        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2323    
2324        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2325        }        }
2326    
2327      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2302  for (;;) Line 2333  for (;;)
2333        }        }
2334      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2335    
2336      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2337    
2338      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2339        {        {
# Line 2443  for (;;) Line 2472  for (;;)
2472    
2473        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2474    
2475        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2476    
2477        if (number > 0)        if (number > 0)
2478          {          {
# Line 2697  for (;;) Line 2724  for (;;)
2724      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2725      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2726      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2727      versions of a character. */      versions of a character, and we have to behave differently for positive and
2728        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2729        treated differently. */
2730    
2731      case OP_CLASS:      case OP_CLASS:
2732        case OP_NEGCLASS:
2733        {        {
2734          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2735        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2736        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2737    
# Line 2729  for (;;) Line 2760  for (;;)
2760          break;          break;
2761    
2762          default:               /* No repeat follows */          default:               /* No repeat follows */
2763          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2764          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2765          }          }
2766    
2767        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2746  for (;;) Line 2770  for (;;)
2770          {          {
2771          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2772          c = *eptr++;          c = *eptr++;
2773          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2774          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2775            runtime caseless, continue if either case is in the map. */
2776    
2777            if (!nasty_case)
2778              {
2779              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2780              if (md->runtime_caseless)
2781                {
2782                c = pcre_fcc[c];
2783                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2784                }
2785              }
2786    
2787            /* Runtime caseless and it was a negative class. Continue only if
2788            both cases are in the map. */
2789    
2790            else
2791            {            {
2792              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2793            c = pcre_fcc[c];            c = pcre_fcc[c];
2794            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2795            }            }
2796    
2797          return FALSE;          return FALSE;
2798          }          }
2799    
# Line 2770  for (;;) Line 2812  for (;;)
2812            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2813            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2814            c = *eptr++;            c = *eptr++;
2815            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2816            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2817              runtime caseless, continue if either case is in the map. */
2818    
2819              if (!nasty_case)
2820                {
2821                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2822                if (md->runtime_caseless)
2823                  {
2824                  c = pcre_fcc[c];
2825                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2826                  }
2827                }
2828    
2829              /* Runtime caseless and it was a negative class. Continue only if
2830              both cases are in the map. */
2831    
2832              else
2833              {              {
2834                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2835              c = pcre_fcc[c];              c = pcre_fcc[c];
2836              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2837              }              }
2838    
2839            return FALSE;            return FALSE;
2840            }            }
2841          /* Control never gets here */          /* Control never gets here */
# Line 2790  for (;;) Line 2850  for (;;)
2850            {            {
2851            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2852            c = *eptr;            c = *eptr;
2853            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2854            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2855              runtime caseless, continue if either case is in the map. */
2856    
2857              if (!nasty_case)
2858              {              {
2859                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860                if (md->runtime_caseless)
2861                  {
2862                  c = pcre_fcc[c];
2863                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2864                  }
2865                }
2866    
2867              /* Runtime caseless and it was a negative class. Continue only if
2868              both cases are in the map. */
2869    
2870              else
2871                {
2872                if ((data[c/8] & (1 << (c&7))) == 0) break;
2873              c = pcre_fcc[c];              c = pcre_fcc[c];
2874              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2875              }              }
2876    
2877            break;            break;
2878            }            }
2879    
# Line 2813  for (;;) Line 2891  for (;;)
2891        register int length = ecode[1];        register int length = ecode[1];
2892        ecode += 2;        ecode += 2;
2893    
2894        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2895        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2896          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2897        else        else
# Line 2824  for (;;) Line 2902  for (;;)
2902          }          }
2903        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2904        printf("\n");        printf("\n");
2905        #endif  #endif
2906    
2907        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2908        if (md->caseless)        if (md->caseless)
# Line 2881  for (;;) Line 2959  for (;;)
2959      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2960      characters and work backwards. */      characters and work backwards. */
2961    
2962      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2963      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2964    
2965      if (md->caseless)      if (md->caseless)
2966        {        {
# Line 2949  for (;;) Line 3025  for (;;)
3025      /* Match a negated single character */      /* Match a negated single character */
3026    
3027      case OP_NOT:      case OP_NOT:
3028      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3029      ecode++;      ecode++;
3030      if (md->caseless)      if (md->caseless)
3031        {        {
# Line 3008  for (;;) Line 3084  for (;;)
3084      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3085      characters and work backwards. */      characters and work backwards. */
3086    
3087      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3088      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3089    
3090      if (md->caseless)      if (md->caseless)
3091        {        {
# Line 3261  for (;;) Line 3335  for (;;)
3335      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3336    
3337      default:      default:
3338      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3339      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3340      return FALSE;      return FALSE;
3341      }      }
# Line 3279  for (;;) Line 3351  for (;;)
3351    
3352    
3353  /*************************************************  /*************************************************
3354    *         Segregate setjmp()                     *
3355    *************************************************/
3356    
3357    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3358    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3359    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3360    since it's needed only for the extension \X option, and with any luck, a good
3361    compiler will spot the tail recursion and compile it efficiently.
3362    
3363    Arguments:
3364       eptr        pointer in subject
3365       ecode       position in code
3366       offset_top  current top pointer
3367       md          pointer to "static" info for the match
3368    
3369    Returns:       TRUE if matched
3370    */
3371    
3372    static BOOL
3373    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3374      match_data *match_block)
3375    {
3376    return setjmp(match_block->fail_env) == 0 &&
3377          match(eptr, ecode, offset_top, match_block);
3378    }
3379    
3380    
3381    
3382    /*************************************************
3383  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3384  *************************************************/  *************************************************/
3385    
# Line 3305  int Line 3406  int
3406  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3407    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3408  {  {
3409  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3410  int first_char = -1;  int first_char = -1;
3411  match_data match_block;  match_data match_block;
3412  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3413  const uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3414  const uschar *end_subject;  const uschar *end_subject;
3415  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3416  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3417    BOOL using_temporary_offsets = FALSE;
3418  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3419  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3420    
# Line 3342  match_block.errorcode = PCRE_ERROR_NOMAT Line 3443  match_block.errorcode = PCRE_ERROR_NOMAT
3443    
3444  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3445  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3446  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3447  multiple of 2. */  of 2. */
3448    
3449  ocount &= (-2);  ocount = offsetcount & (-2);
3450  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3451    {    {
3452    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3453    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3454    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3455    #ifdef DEBUG    using_temporary_offsets = TRUE;
3456    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3457    }    }
3458  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3459    
# Line 3406  if (!anchored) Line 3506  if (!anchored)
3506    
3507  do  do
3508    {    {
3509      int rc;
3510    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3511    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3512    
# Line 3447  do Line 3548  do
3548        }        }
3549      }      }
3550    
3551    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3552    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3553    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3554    printf("\n");    printf("\n");
3555    #endif  #endif
3556    
3557    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3558    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3461  do Line 3562  do
3562    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3563    
3564    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3565    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3566      is done in a separate function to avoid compiler warnings. We need not do
3567      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3568      enabled. */
3569    
3570    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3571      {      {
3572      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3573          continue;
3574      if (ocount != offsetcount)      }
3575        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3576    
3577        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3578    
3579        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3580        {
3581        if (offsetcount >= 4)
3582          {
3583          memcpy(offsets + 2, match_block.offset_vector + 2,
3584            (offsetcount - 2) * sizeof(int));
3585          DPRINTF(("Copied offsets from temporary memory\n"));
3586        }        }
3587        if (match_block.end_offset_top > offsetcount)
3588          match_block.offset_overflow = TRUE;
3589    
3590      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3591        (pcre_free)(match_block.offset_vector);
3592        }
3593    
3594      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3595    
3596      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3597      printf(">>>> returning %d\n", rc);      {
3598      #endif      offsets[0] = start_match - match_block.start_subject;
3599      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3600      }      }
3601    
3602      DPRINTF((">>>> returning %d\n", rc));
3603      return rc;
3604    }    }
3605  while (!anchored &&  while (!anchored &&
3606         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3607         start_match++ < end_subject);         start_match++ < end_subject);
3608    
3609  #ifdef DEBUG  if (using_temporary_offsets)
3610  printf(">>>> returning %d\n", match_block.errorcode);    {
3611  #endif    DPRINTF(("Freeing temporary memory\n"));
3612      (pcre_free)(match_block.offset_vector);
3613      }
3614    
3615    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3616    
3617  return match_block.errorcode;  return match_block.errorcode;
3618  }  }

Legend:
Removed from v.7  
changed lines
  Added in v.14

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12