/[pcre]/code/tags/pcre-3.3/study.c
ViewVC logotype

Contents of /code/tags/pcre-3.3/study.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (hide annotations) (download)
Sat Feb 24 21:38:21 2007 UTC (7 years, 5 months ago) by nigel
Original Path: code/trunk/study.c
File MIME type: text/plain
File size: 9003 byte(s)
Load pcre-1.05 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12     Copyright (c) 1997 University of Cambridge
13    
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Include the internals header, which itself includes Standard C headers plus
33     the external pcre header. */
34    
35     #include "internal.h"
36    
37    
38    
39     /*************************************************
40     * Create bitmap of starting chars *
41     *************************************************/
42    
43     /* This function scans a compiled unanchored expression and attempts to build a
44     bitmap of the set of initial characters. If it can't, it returns FALSE. As time
45     goes by, we may be able to get more clever at doing this.
46    
47     Arguments:
48     code points to an expression
49     start_bits points to a 32-byte table, initialized to 0
50    
51     Returns: TRUE if table built, FALSE otherwise
52     */
53    
54     static BOOL
55 nigel 7 set_start_bits(const uschar *code, uschar *start_bits)
56 nigel 3 {
57     register int c;
58    
59     do
60     {
61 nigel 7 const uschar *tcode = code + 3;
62 nigel 3 BOOL try_next = TRUE;
63    
64     while (try_next)
65     {
66     try_next = FALSE;
67    
68     if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
69     {
70     if (!set_start_bits(tcode, start_bits)) return FALSE;
71     }
72    
73     else switch(*tcode)
74     {
75     default:
76     return FALSE;
77    
78     /* BRAZERO does the bracket, but carries on. */
79    
80     case OP_BRAZERO:
81     case OP_BRAMINZERO:
82     if (!set_start_bits(++tcode, start_bits)) return FALSE;
83     do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
84     tcode += 3;
85     try_next = TRUE;
86     break;
87    
88     /* Single-char * or ? sets the bit and tries the next item */
89    
90     case OP_STAR:
91     case OP_MINSTAR:
92     case OP_QUERY:
93     case OP_MINQUERY:
94     start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));
95     tcode += 2;
96     try_next = TRUE;
97     break;
98    
99     /* Single-char upto sets the bit and tries the next */
100    
101     case OP_UPTO:
102     case OP_MINUPTO:
103     start_bits[tcode[3]/8] |= (1 << (tcode[3]&7));
104     tcode += 4;
105     try_next = TRUE;
106     break;
107    
108     /* At least one single char sets the bit and stops */
109    
110     case OP_EXACT: /* Fall through */
111     tcode++;
112    
113     case OP_CHARS: /* Fall through */
114     tcode++;
115    
116     case OP_PLUS:
117     case OP_MINPLUS:
118     start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));
119     break;
120    
121     /* Single character type sets the bits and stops */
122    
123     case OP_NOT_DIGIT:
124     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];
125     break;
126    
127     case OP_DIGIT:
128     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];
129     break;
130    
131     case OP_NOT_WHITESPACE:
132     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];
133     break;
134    
135     case OP_WHITESPACE:
136     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];
137     break;
138    
139     case OP_NOT_WORDCHAR:
140     for (c = 0; c < 32; c++)
141     start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
142     break;
143    
144     case OP_WORDCHAR:
145     for (c = 0; c < 32; c++)
146     start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
147     break;
148    
149     /* One or more character type fudges the pointer and restarts, knowing
150     it will hit a single character type and stop there. */
151    
152     case OP_TYPEPLUS:
153     case OP_TYPEMINPLUS:
154     tcode++;
155     try_next = TRUE;
156     break;
157    
158     case OP_TYPEEXACT:
159     tcode += 3;
160     try_next = TRUE;
161     break;
162    
163     /* Zero or more repeats of character types set the bits and then
164     try again. */
165    
166     case OP_TYPEUPTO:
167     case OP_TYPEMINUPTO:
168     tcode += 2; /* Fall through */
169    
170     case OP_TYPESTAR:
171     case OP_TYPEMINSTAR:
172     case OP_TYPEQUERY:
173     case OP_TYPEMINQUERY:
174     switch(tcode[1])
175     {
176     case OP_NOT_DIGIT:
177     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];
178     break;
179    
180     case OP_DIGIT:
181     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];
182     break;
183    
184     case OP_NOT_WHITESPACE:
185     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];
186     break;
187    
188     case OP_WHITESPACE:
189     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];
190     break;
191    
192     case OP_NOT_WORDCHAR:
193     for (c = 0; c < 32; c++)
194     start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
195     break;
196    
197     case OP_WORDCHAR:
198     for (c = 0; c < 32; c++)
199     start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
200     break;
201     }
202    
203     tcode += 2;
204     try_next = TRUE;
205     break;
206    
207     /* Character class: set the bits and either carry on or not,
208     according to the repeat count. */
209    
210     case OP_CLASS:
211 nigel 13 case OP_NEGCLASS:
212 nigel 3 {
213     tcode++;
214     for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
215     tcode += 32;
216     switch (*tcode)
217     {
218     case OP_CRSTAR:
219     case OP_CRMINSTAR:
220     case OP_CRQUERY:
221     case OP_CRMINQUERY:
222     tcode++;
223     try_next = TRUE;
224     break;
225    
226     case OP_CRRANGE:
227     case OP_CRMINRANGE:
228     if (((tcode[1] << 8) + tcode[2]) == 0)
229     {
230     tcode += 5;
231     try_next = TRUE;
232     }
233     break;
234     }
235     }
236     break; /* End of class handling */
237    
238     } /* End of switch */
239     } /* End of try_next loop */
240    
241     code += (code[1] << 8) + code[2]; /* Advance to next branch */
242     }
243     while (*code == OP_ALT);
244     return TRUE;
245     }
246    
247    
248    
249     /*************************************************
250     * Study a compiled expression *
251     *************************************************/
252    
253     /* This function is handed a compiled expression that it must study to produce
254     information that will speed up the matching. It returns a pcre_extra block
255     which then gets handed back to pcre_exec().
256    
257     Arguments:
258     re points to the compiled expression
259     options contains option bits
260     errorptr points to where to place error messages;
261     set NULL unless error
262    
263     Returns: pointer to a pcre_extra block,
264     NULL on error or if no optimization possible
265     */
266    
267     pcre_extra *
268 nigel 7 pcre_study(const pcre *external_re, int options, const char **errorptr)
269 nigel 3 {
270     BOOL caseless;
271     uschar start_bits[32];
272     real_pcre_extra *extra;
273 nigel 7 const real_pcre *re = (const real_pcre *)external_re;
274 nigel 3
275     *errorptr = NULL;
276    
277     if (re == NULL || re->magic_number != MAGIC_NUMBER)
278     {
279     *errorptr = "argument is not a compiled regular expression";
280     return NULL;
281     }
282    
283     if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
284     {
285     *errorptr = "unknown or incorrect option bit(s) set";
286     return NULL;
287     }
288    
289     /* Caseless can either be from the compiled regex or from options. */
290    
291     caseless = ((re->options | options) & PCRE_CASELESS) != 0;
292    
293     /* For an anchored pattern, or an unchored pattern that has a first char, or a
294     multiline pattern that matches only at "line starts", no further processing at
295     present. */
296    
297     if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
298     return NULL;
299    
300     /* See if we can find a fixed set of initial characters for the pattern. */
301    
302     memset(start_bits, 0, 32 * sizeof(uschar));
303     if (!set_start_bits(re->code, start_bits)) return NULL;
304    
305     /* If this studying is caseless, scan the created bit map and duplicate the
306     bits for any letters. */
307    
308     if (caseless)
309     {
310     register int c;
311     for (c = 0; c < 256; c++)
312     {
313     if ((start_bits[c/8] & (1 << (c&7))) != 0 &&
314     (pcre_ctypes[c] & ctype_letter) != 0)
315     {
316     int d = pcre_fcc[c];
317     start_bits[d/8] |= (1 << (d&7));
318     }
319     }
320     }
321    
322     /* Get an "extra" block and put the information therein. */
323    
324     extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
325    
326     if (extra == NULL)
327     {
328     *errorptr = "failed to get memory";
329     return NULL;
330     }
331    
332     extra->options = PCRE_STUDY_MAPPED | (caseless? PCRE_STUDY_CASELESS : 0);
333     memcpy(extra->start_bits, start_bits, sizeof(start_bits));
334    
335     return (pcre_extra *)extra;
336     }
337    
338     /* End of study.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12