/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC revision 21 by nigel, Sat Feb 24 21:38:37 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 49  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 65  static const char *OP_names[] = { Line 72  static const char *OP_names[] = {
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 76  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 91  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 256  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 270  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 306  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
325    
# Line 609  compile_branch(int options, int *bracket Line 623  compile_branch(int options, int *bracket
623  int repeat_type, op_type;  int repeat_type, op_type;
624  int repeat_min, repeat_max;  int repeat_min, repeat_max;
625  int bravalue, length;  int bravalue, length;
626    int greedy_default, greedy_non_default;
627  register int c;  register int c;
628  register uschar *code = *codeptr;  register uschar *code = *codeptr;
629  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
# Line 616  const uschar *oldptr; Line 631  const uschar *oldptr;
631  uschar *previous = NULL;  uschar *previous = NULL;
632  uschar class[32];  uschar class[32];
633    
634    /* Set up the default and non-default settings for greediness */
635    
636    greedy_default = ((options & PCRE_UNGREEDY) != 0);
637    greedy_non_default = greedy_default ^ 1;
638    
639  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
640    
641  for (;; ptr++)  for (;; ptr++)
# Line 670  for (;; ptr++) Line 690  for (;; ptr++)
690    
691      case '[':      case '[':
692      previous = code;      previous = code;
     *code++ = OP_CLASS;  
693    
694      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
695        different opcode. This only matters if caseless matching is specified at
696        runtime. */
697    
698      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
699        {        {
700        negate_class = TRUE;        negate_class = TRUE;
701          *code++ = OP_NEGCLASS;
702        c = *(++ptr);        c = *(++ptr);
703        }        }
704      else negate_class = FALSE;      else
705          {
706          negate_class = FALSE;
707          *code++ = OP_CLASS;
708          }
709    
710      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
711      character. */      character. */
# Line 887  for (;; ptr++) Line 913  for (;; ptr++)
913        goto FAILED;        goto FAILED;
914        }        }
915    
916      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
917        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
918      next character. */      next character. */
919    
920      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
921          { repeat_type = greedy_non_default; ptr++; }
922        else repeat_type = greedy_default;
923    
924      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
925      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 979  for (;; ptr++) Line 1008  for (;; ptr++)
1008          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1009          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1010          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1011          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1012          back naturally. */          put back naturally. Note that the final character is always going to
1013            get added below. */
1014    
1015          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1016            {            {
1017            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1018            }            }
1019    
1020            /*  For a single negated character we also have to put back the
1021            item that got cancelled. */
1022    
1023            else if (*previous == OP_NOT) code++;
1024    
1025          /* If the maximum is unlimited, insert an OP_STAR. */          /* If the maximum is unlimited, insert an OP_STAR. */
1026    
1027          if (repeat_max < 0)          if (repeat_max < 0)
# Line 1015  for (;; ptr++) Line 1050  for (;; ptr++)
1050      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1051      stuff after it. */      stuff after it. */
1052    
1053      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1054                 *previous == OP_REF)
1055        {        {
1056        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1057          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1128  for (;; ptr++) Line 1164  for (;; ptr++)
1164          case 'm':          case 'm':
1165          case 's':          case 's':
1166          case 'x':          case 'x':
1167            case 'U':
1168            case 'X':
1169          ptr++;          ptr++;
1170          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
1171          previous = NULL;          previous = NULL;
# Line 1287  for (;; ptr++) Line 1325  for (;; ptr++)
1325      the next state. */      the next state. */
1326    
1327      previous[1] = length;      previous[1] = length;
1328      ptr--;      if (length < 255) ptr--;
1329      break;      break;
1330      }      }
1331    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1731  while ((c = *(++ptr)) != 0) Line 1769  while ((c = *(++ptr)) != 0)
1769          ptr += 2;          ptr += 2;
1770          break;          break;
1771          }          }
1772        /* Else fall thourh */        /* Else fall through */
1773    
1774        /* Else loop setting valid options until ) is met. Anything else is an        /* Else loop setting valid options until ) is met. Anything else is an
1775        error. */        error. */
# Line 1761  while ((c = *(++ptr)) != 0) Line 1799  while ((c = *(++ptr)) != 0)
1799            length -= spaces;          /* Already counted spaces */            length -= spaces;          /* Already counted spaces */
1800            continue;            continue;
1801            }            }
1802            else if (c == 'X')
1803              {
1804              options |= PCRE_EXTRA;
1805              continue;
1806              }
1807            else if (c == 'U')
1808              {
1809              options |= PCRE_UNGREEDY;
1810              continue;
1811              }
1812          else if (c == ')') break;          else if (c == ')') break;
1813    
1814          *errorptr = ERR12;          *errorptr = ERR12;
# Line 1966  printf("Length = %d top_bracket = %d top Line 2014  printf("Length = %d top_bracket = %d top
2014    
2015  if (re->options != 0)  if (re->options != 0)
2016    {    {
2017    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2018      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2019      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2020      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2021      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2022      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2023      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2024      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2025        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2026    }    }
2027    
2028  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 2090  while (code < code_end) Line 2139  while (code < code_end)
2139      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2140    
2141      case OP_CLASS:      case OP_CLASS:
2142        case OP_NEGCLASS:
2143        {        {
2144        int i, min, max;        int i, min, max;
2145    
2146        code++;        if (*code++ == OP_CLASS) printf("    [");
2147        printf("    [");          else printf("   ^[");
2148    
2149        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2150          {          {
# Line 2714  for (;;) Line 2764  for (;;)
2764      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2765      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2766      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2767      versions of a character. */      versions of a character, and we have to behave differently for positive and
2768        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2769        treated differently. */
2770    
2771      case OP_CLASS:      case OP_CLASS:
2772        case OP_NEGCLASS:
2773        {        {
2774          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2775        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2776        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2777    
# Line 2746  for (;;) Line 2800  for (;;)
2800          break;          break;
2801    
2802          default:               /* No repeat follows */          default:               /* No repeat follows */
2803          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2804          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2805          }          }
2806    
2807        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2763  for (;;) Line 2810  for (;;)
2810          {          {
2811          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2812          c = *eptr++;          c = *eptr++;
2813          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2814          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2815            runtime caseless, continue if either case is in the map. */
2816    
2817            if (!nasty_case)
2818            {            {
2819              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2820              if (md->runtime_caseless)
2821                {
2822                c = pcre_fcc[c];
2823                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2824                }
2825              }
2826    
2827            /* Runtime caseless and it was a negative class. Continue only if
2828            both cases are in the map. */
2829    
2830            else
2831              {
2832              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2833            c = pcre_fcc[c];            c = pcre_fcc[c];
2834            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2835            }            }
2836    
2837          return FALSE;          return FALSE;
2838          }          }
2839    
# Line 2787  for (;;) Line 2852  for (;;)
2852            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2853            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2854            c = *eptr++;            c = *eptr++;
2855            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2856            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2857              runtime caseless, continue if either case is in the map. */
2858    
2859              if (!nasty_case)
2860              {              {
2861                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2862                if (md->runtime_caseless)
2863                  {
2864                  c = pcre_fcc[c];
2865                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2866                  }
2867                }
2868    
2869              /* Runtime caseless and it was a negative class. Continue only if
2870              both cases are in the map. */
2871    
2872              else
2873                {
2874                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2875              c = pcre_fcc[c];              c = pcre_fcc[c];
2876              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2877              }              }
2878    
2879            return FALSE;            return FALSE;
2880            }            }
2881          /* Control never gets here */          /* Control never gets here */
# Line 2807  for (;;) Line 2890  for (;;)
2890            {            {
2891            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2892            c = *eptr;            c = *eptr;
2893            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2894            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2895              runtime caseless, continue if either case is in the map. */
2896    
2897              if (!nasty_case)
2898              {              {
2899                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2900                if (md->runtime_caseless)
2901                  {
2902                  c = pcre_fcc[c];
2903                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2904                  }
2905                }
2906    
2907              /* Runtime caseless and it was a negative class. Continue only if
2908              both cases are in the map. */
2909    
2910              else
2911                {
2912                if ((data[c/8] & (1 << (c&7))) == 0) break;
2913              c = pcre_fcc[c];              c = pcre_fcc[c];
2914              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2915              }              }
2916    
2917            break;            break;
2918            }            }
2919    
# Line 3389  ocount = offsetcount & (-2); Line 3490  ocount = offsetcount & (-2);
3490  if (re->top_backref > 0 && re->top_backref >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3491    {    {
3492    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3493    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3494    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3495    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3496    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));

Legend:
Removed from v.11  
changed lines
  Added in v.21

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12