/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 49  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 65  static const char *OP_names[] = { Line 72  static const char *OP_names[] = {
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 76  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 91  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 306  do { Line 314  do {
314        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
315    
316        case OP_CLASS:        case OP_CLASS:
317          case OP_NEGCLASS:
318        case OP_REF:        case OP_REF:
319        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
320    
# Line 670  for (;; ptr++) Line 679  for (;; ptr++)
679    
680      case '[':      case '[':
681      previous = code;      previous = code;
     *code++ = OP_CLASS;  
682    
683      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
684        different opcode. This only matters if caseless matching is specified at
685        runtime. */
686    
687      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
688        {        {
689        negate_class = TRUE;        negate_class = TRUE;
690          *code++ = OP_NEGCLASS;
691        c = *(++ptr);        c = *(++ptr);
692        }        }
693      else negate_class = FALSE;      else
694          {
695          negate_class = FALSE;
696          *code++ = OP_CLASS;
697          }
698    
699      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
700      character. */      character. */
# Line 1015  for (;; ptr++) Line 1030  for (;; ptr++)
1030      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1031      stuff after it. */      stuff after it. */
1032    
1033      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1034                 *previous == OP_REF)
1035        {        {
1036        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1037          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1287  for (;; ptr++) Line 1303  for (;; ptr++)
1303      the next state. */      the next state. */
1304    
1305      previous[1] = length;      previous[1] = length;
1306      ptr--;      if (length < 255) ptr--;
1307      break;      break;
1308      }      }
1309    }                   /* end of big loop */    }                   /* end of big loop */
# Line 2090  while (code < code_end) Line 2106  while (code < code_end)
2106      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2107    
2108      case OP_CLASS:      case OP_CLASS:
2109        case OP_NEGCLASS:
2110        {        {
2111        int i, min, max;        int i, min, max;
2112    
2113        code++;        if (*code++ == OP_CLASS) printf("    [");
2114        printf("    [");          else printf("   ^[");
2115    
2116        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2117          {          {
# Line 2714  for (;;) Line 2731  for (;;)
2731      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2732      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2733      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2734      versions of a character. */      versions of a character, and we have to behave differently for positive and
2735        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2736        treated differently. */
2737    
2738      case OP_CLASS:      case OP_CLASS:
2739        case OP_NEGCLASS:
2740        {        {
2741          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2742        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2743        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2744    
# Line 2746  for (;;) Line 2767  for (;;)
2767          break;          break;
2768    
2769          default:               /* No repeat follows */          default:               /* No repeat follows */
2770          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2771          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2772          }          }
2773    
2774        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2763  for (;;) Line 2777  for (;;)
2777          {          {
2778          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2779          c = *eptr++;          c = *eptr++;
2780          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2781          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2782            runtime caseless, continue if either case is in the map. */
2783    
2784            if (!nasty_case)
2785              {
2786              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2787              if (md->runtime_caseless)
2788                {
2789                c = pcre_fcc[c];
2790                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2791                }
2792              }
2793    
2794            /* Runtime caseless and it was a negative class. Continue only if
2795            both cases are in the map. */
2796    
2797            else
2798            {            {
2799              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2800            c = pcre_fcc[c];            c = pcre_fcc[c];
2801            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2802            }            }
2803    
2804          return FALSE;          return FALSE;
2805          }          }
2806    
# Line 2787  for (;;) Line 2819  for (;;)
2819            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2820            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2821            c = *eptr++;            c = *eptr++;
2822            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2823            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2824              runtime caseless, continue if either case is in the map. */
2825    
2826              if (!nasty_case)
2827                {
2828                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829                if (md->runtime_caseless)
2830                  {
2831                  c = pcre_fcc[c];
2832                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2833                  }
2834                }
2835    
2836              /* Runtime caseless and it was a negative class. Continue only if
2837              both cases are in the map. */
2838    
2839              else
2840              {              {
2841                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2842              c = pcre_fcc[c];              c = pcre_fcc[c];
2843              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2844              }              }
2845    
2846            return FALSE;            return FALSE;
2847            }            }
2848          /* Control never gets here */          /* Control never gets here */
# Line 2807  for (;;) Line 2857  for (;;)
2857            {            {
2858            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2859            c = *eptr;            c = *eptr;
2860            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2861            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2862              runtime caseless, continue if either case is in the map. */
2863    
2864              if (!nasty_case)
2865              {              {
2866                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2867                if (md->runtime_caseless)
2868                  {
2869                  c = pcre_fcc[c];
2870                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871                  }
2872                }
2873    
2874              /* Runtime caseless and it was a negative class. Continue only if
2875              both cases are in the map. */
2876    
2877              else
2878                {
2879                if ((data[c/8] & (1 << (c&7))) == 0) break;
2880              c = pcre_fcc[c];              c = pcre_fcc[c];
2881              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2882              }              }
2883    
2884            break;            break;
2885            }            }
2886    
# Line 3389  ocount = offsetcount & (-2); Line 3457  ocount = offsetcount & (-2);
3457  if (re->top_backref > 0 && re->top_backref >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3458    {    {
3459    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3460    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3461    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3462    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3463    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));

Legend:
Removed from v.11  
changed lines
  Added in v.15

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12