/[pcre]/code/trunk/study.c
ViewVC logotype

Diff of /code/trunk/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC
# Line 37  the external pcre header. */ Line 37  the external pcre header. */
37    
38    
39  /*************************************************  /*************************************************
40    *      Set a bit and maybe its alternate case    *
41    *************************************************/
42    
43    /* Given a character, set its bit in the table, and also the bit for the other
44    version of a letter if we are caseless.
45    
46    Arguments:
47      start_bits    points to the bit map
48      c             is the character
49      caseless      the caseless flag
50    
51    Returns:        nothing
52    */
53    
54    static void
55    set_bit(uschar *start_bits, int c, BOOL caseless)
56    {
57    start_bits[c/8] |= (1 << (c&7));
58    if (caseless && (pcre_ctypes[c] & ctype_letter) != 0)
59      start_bits[pcre_fcc[c]/8] |= (1 << (pcre_fcc[c]&7));
60    }
61    
62    
63    
64    /*************************************************
65  *          Create bitmap of starting chars       *  *          Create bitmap of starting chars       *
66  *************************************************/  *************************************************/
67    
# Line 47  goes by, we may be able to get more clev Line 72  goes by, we may be able to get more clev
72  Arguments:  Arguments:
73    code         points to an expression    code         points to an expression
74    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
75      caseless     the current state of the caseless flag
76    
77  Returns:       TRUE if table built, FALSE otherwise  Returns:       TRUE if table built, FALSE otherwise
78  */  */
79    
80  static BOOL  static BOOL
81  set_start_bits(const uschar *code, uschar *start_bits)  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless)
82  {  {
83  register int c;  register int c;
84    
# Line 65  do Line 91  do
91      {      {
92      try_next = FALSE;      try_next = FALSE;
93    
94        /* If a branch starts with a bracket or a positive lookahead assertion,
95        recurse to set bits from within them. That's all for this branch. */
96    
97      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)      if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
98        {        {
99        if (!set_start_bits(tcode, start_bits)) return FALSE;        if (!set_start_bits(tcode, start_bits, caseless)) return FALSE;
100        }        }
101    
102      else switch(*tcode)      else switch(*tcode)
# Line 75  do Line 104  do
104        default:        default:
105        return FALSE;        return FALSE;
106    
107          /* Skip over lookbehind and negative lookahead assertions */
108    
109          case OP_ASSERT_NOT:
110          case OP_ASSERTBACK:
111          case OP_ASSERTBACK_NOT:
112          try_next = TRUE;
113          do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
114          tcode += 3;
115          break;
116    
117          /* Skip over an option setting, changing the caseless flag */
118    
119          case OP_OPT:
120          caseless = (tcode[1] & PCRE_CASELESS) != 0;
121          tcode += 2;
122          try_next = TRUE;
123          break;
124    
125        /* BRAZERO does the bracket, but carries on. */        /* BRAZERO does the bracket, but carries on. */
126    
127        case OP_BRAZERO:        case OP_BRAZERO:
128        case OP_BRAMINZERO:        case OP_BRAMINZERO:
129        if (!set_start_bits(++tcode, start_bits)) return FALSE;        if (!set_start_bits(++tcode, start_bits, caseless)) return FALSE;
130        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);        do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
131        tcode += 3;        tcode += 3;
132        try_next = TRUE;        try_next = TRUE;
# Line 91  do Line 138  do
138        case OP_MINSTAR:        case OP_MINSTAR:
139        case OP_QUERY:        case OP_QUERY:
140        case OP_MINQUERY:        case OP_MINQUERY:
141        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless);
142        tcode += 2;        tcode += 2;
143        try_next = TRUE;        try_next = TRUE;
144        break;        break;
# Line 100  do Line 147  do
147    
148        case OP_UPTO:        case OP_UPTO:
149        case OP_MINUPTO:        case OP_MINUPTO:
150        start_bits[tcode[3]/8] |= (1 << (tcode[3]&7));        set_bit(start_bits, tcode[3], caseless);
151        tcode += 4;        tcode += 4;
152        try_next = TRUE;        try_next = TRUE;
153        break;        break;
# Line 115  do Line 162  do
162    
163        case OP_PLUS:        case OP_PLUS:
164        case OP_MINPLUS:        case OP_MINPLUS:
165        start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));        set_bit(start_bits, tcode[1], caseless);
166        break;        break;
167    
168        /* Single character type sets the bits and stops */        /* Single character type sets the bits and stops */
# Line 208  do Line 255  do
255        according to the repeat count. */        according to the repeat count. */
256    
257        case OP_CLASS:        case OP_CLASS:
       case OP_NEGCLASS:  
258          {          {
259          tcode++;          tcode++;
260          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
# Line 267  Returns: pointer to a pcre_extra bloc Line 313  Returns: pointer to a pcre_extra bloc
313  pcre_extra *  pcre_extra *
314  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
315  {  {
 BOOL caseless;  
316  uschar start_bits[32];  uschar start_bits[32];
317  real_pcre_extra *extra;  real_pcre_extra *extra;
318  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
# Line 286  if ((options & ~PUBLIC_STUDY_OPTIONS) != Line 331  if ((options & ~PUBLIC_STUDY_OPTIONS) !=
331    return NULL;    return NULL;
332    }    }
333    
 /* Caseless can either be from the compiled regex or from options. */  
   
 caseless = ((re->options | options) & PCRE_CASELESS) != 0;  
   
334  /* For an anchored pattern, or an unchored pattern that has a first char, or a  /* For an anchored pattern, or an unchored pattern that has a first char, or a
335  multiline pattern that matches only at "line starts", no further processing at  multiline pattern that matches only at "line starts", no further processing at
336  present. */  present. */
# Line 300  if ((re->options & (PCRE_ANCHORED|PCRE_F Line 341  if ((re->options & (PCRE_ANCHORED|PCRE_F
341  /* See if we can find a fixed set of initial characters for the pattern. */  /* See if we can find a fixed set of initial characters for the pattern. */
342    
343  memset(start_bits, 0, 32 * sizeof(uschar));  memset(start_bits, 0, 32 * sizeof(uschar));
344  if (!set_start_bits(re->code, start_bits)) return NULL;  if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0))
345      return NULL;
 /* If this studying is caseless, scan the created bit map and duplicate the  
 bits for any letters. */  
   
 if (caseless)  
   {  
   register int c;  
   for (c = 0; c < 256; c++)  
     {  
     if ((start_bits[c/8] & (1 << (c&7))) != 0 &&  
         (pcre_ctypes[c] & ctype_letter) != 0)  
       {  
       int d = pcre_fcc[c];  
       start_bits[d/8] |= (1 << (d&7));  
       }  
     }  
   }  
346    
347  /* Get an "extra" block and put the information therein. */  /* Get an "extra" block and put the information therein. */
348    
# Line 329  if (extra == NULL) Line 354  if (extra == NULL)
354    return NULL;    return NULL;
355    }    }
356    
357  extra->options = PCRE_STUDY_MAPPED | (caseless? PCRE_STUDY_CASELESS : 0);  extra->options = PCRE_STUDY_MAPPED;
358  memcpy(extra->start_bits, start_bits, sizeof(start_bits));  memcpy(extra->start_bits, start_bits, sizeof(start_bits));
359    
360  return (pcre_extra *)extra;  return (pcre_extra *)extra;

Legend:
Removed from v.15  
changed lines
  Added in v.23

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12