/[pcre]/code/tags/pcre-2.04/study.c
ViewVC logotype

Contents of /code/tags/pcre-2.04/study.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (hide annotations) (download)
Sat Feb 24 21:38:01 2007 UTC (7 years, 2 months ago) by nigel
Original Path: code/trunk/study.c
File MIME type: text/plain
File size: 8949 byte(s)
Load pcre-1.00 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12     Copyright (c) 1997 University of Cambridge
13    
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Include the internals header, which itself includes Standard C headers plus
33     the external pcre header. */
34    
35     #include "internal.h"
36    
37    
38    
39     /*************************************************
40     * Create bitmap of starting chars *
41     *************************************************/
42    
43     /* This function scans a compiled unanchored expression and attempts to build a
44     bitmap of the set of initial characters. If it can't, it returns FALSE. As time
45     goes by, we may be able to get more clever at doing this.
46    
47     Arguments:
48     code points to an expression
49     start_bits points to a 32-byte table, initialized to 0
50    
51     Returns: TRUE if table built, FALSE otherwise
52     */
53    
54     static BOOL
55     set_start_bits(uschar *code, uschar *start_bits)
56     {
57     register int c;
58    
59     do
60     {
61     uschar *tcode = code + 3;
62     BOOL try_next = TRUE;
63    
64     while (try_next)
65     {
66     try_next = FALSE;
67    
68     if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
69     {
70     if (!set_start_bits(tcode, start_bits)) return FALSE;
71     }
72    
73     else switch(*tcode)
74     {
75     default:
76     return FALSE;
77    
78     /* BRAZERO does the bracket, but carries on. */
79    
80     case OP_BRAZERO:
81     case OP_BRAMINZERO:
82     if (!set_start_bits(++tcode, start_bits)) return FALSE;
83     do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
84     tcode += 3;
85     try_next = TRUE;
86     break;
87    
88     /* Single-char * or ? sets the bit and tries the next item */
89    
90     case OP_STAR:
91     case OP_MINSTAR:
92     case OP_QUERY:
93     case OP_MINQUERY:
94     start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));
95     tcode += 2;
96     try_next = TRUE;
97     break;
98    
99     /* Single-char upto sets the bit and tries the next */
100    
101     case OP_UPTO:
102     case OP_MINUPTO:
103     start_bits[tcode[3]/8] |= (1 << (tcode[3]&7));
104     tcode += 4;
105     try_next = TRUE;
106     break;
107    
108     /* At least one single char sets the bit and stops */
109    
110     case OP_EXACT: /* Fall through */
111     tcode++;
112    
113     case OP_CHARS: /* Fall through */
114     tcode++;
115    
116     case OP_PLUS:
117     case OP_MINPLUS:
118     start_bits[tcode[1]/8] |= (1 << (tcode[1]&7));
119     break;
120    
121     /* Single character type sets the bits and stops */
122    
123     case OP_NOT_DIGIT:
124     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];
125     break;
126    
127     case OP_DIGIT:
128     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];
129     break;
130    
131     case OP_NOT_WHITESPACE:
132     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];
133     break;
134    
135     case OP_WHITESPACE:
136     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];
137     break;
138    
139     case OP_NOT_WORDCHAR:
140     for (c = 0; c < 32; c++)
141     start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
142     break;
143    
144     case OP_WORDCHAR:
145     for (c = 0; c < 32; c++)
146     start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
147     break;
148    
149     /* One or more character type fudges the pointer and restarts, knowing
150     it will hit a single character type and stop there. */
151    
152     case OP_TYPEPLUS:
153     case OP_TYPEMINPLUS:
154     tcode++;
155     try_next = TRUE;
156     break;
157    
158     case OP_TYPEEXACT:
159     tcode += 3;
160     try_next = TRUE;
161     break;
162    
163     /* Zero or more repeats of character types set the bits and then
164     try again. */
165    
166     case OP_TYPEUPTO:
167     case OP_TYPEMINUPTO:
168     tcode += 2; /* Fall through */
169    
170     case OP_TYPESTAR:
171     case OP_TYPEMINSTAR:
172     case OP_TYPEQUERY:
173     case OP_TYPEMINQUERY:
174     switch(tcode[1])
175     {
176     case OP_NOT_DIGIT:
177     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_digit];
178     break;
179    
180     case OP_DIGIT:
181     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_digit];
182     break;
183    
184     case OP_NOT_WHITESPACE:
185     for (c = 0; c < 32; c++) start_bits[c] |= ~pcre_cbits[c+cbit_space];
186     break;
187    
188     case OP_WHITESPACE:
189     for (c = 0; c < 32; c++) start_bits[c] |= pcre_cbits[c+cbit_space];
190     break;
191    
192     case OP_NOT_WORDCHAR:
193     for (c = 0; c < 32; c++)
194     start_bits[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
195     break;
196    
197     case OP_WORDCHAR:
198     for (c = 0; c < 32; c++)
199     start_bits[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
200     break;
201     }
202    
203     tcode += 2;
204     try_next = TRUE;
205     break;
206    
207     /* Character class: set the bits and either carry on or not,
208     according to the repeat count. */
209    
210     case OP_CLASS:
211     {
212     tcode++;
213     for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
214     tcode += 32;
215     switch (*tcode)
216     {
217     case OP_CRSTAR:
218     case OP_CRMINSTAR:
219     case OP_CRQUERY:
220     case OP_CRMINQUERY:
221     tcode++;
222     try_next = TRUE;
223     break;
224    
225     case OP_CRRANGE:
226     case OP_CRMINRANGE:
227     if (((tcode[1] << 8) + tcode[2]) == 0)
228     {
229     tcode += 5;
230     try_next = TRUE;
231     }
232     break;
233     }
234     }
235     break; /* End of class handling */
236    
237     } /* End of switch */
238     } /* End of try_next loop */
239    
240     code += (code[1] << 8) + code[2]; /* Advance to next branch */
241     }
242     while (*code == OP_ALT);
243     return TRUE;
244     }
245    
246    
247    
248     /*************************************************
249     * Study a compiled expression *
250     *************************************************/
251    
252     /* This function is handed a compiled expression that it must study to produce
253     information that will speed up the matching. It returns a pcre_extra block
254     which then gets handed back to pcre_exec().
255    
256     Arguments:
257     re points to the compiled expression
258     options contains option bits
259     errorptr points to where to place error messages;
260     set NULL unless error
261    
262     Returns: pointer to a pcre_extra block,
263     NULL on error or if no optimization possible
264     */
265    
266     pcre_extra *
267     pcre_study(const pcre *external_re, int options, char **errorptr)
268     {
269     BOOL caseless;
270     uschar start_bits[32];
271     real_pcre_extra *extra;
272     real_pcre *re = (real_pcre *)external_re;
273    
274     *errorptr = NULL;
275    
276     if (re == NULL || re->magic_number != MAGIC_NUMBER)
277     {
278     *errorptr = "argument is not a compiled regular expression";
279     return NULL;
280     }
281    
282     if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
283     {
284     *errorptr = "unknown or incorrect option bit(s) set";
285     return NULL;
286     }
287    
288     /* Caseless can either be from the compiled regex or from options. */
289    
290     caseless = ((re->options | options) & PCRE_CASELESS) != 0;
291    
292     /* For an anchored pattern, or an unchored pattern that has a first char, or a
293     multiline pattern that matches only at "line starts", no further processing at
294     present. */
295    
296     if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
297     return NULL;
298    
299     /* See if we can find a fixed set of initial characters for the pattern. */
300    
301     memset(start_bits, 0, 32 * sizeof(uschar));
302     if (!set_start_bits(re->code, start_bits)) return NULL;
303    
304     /* If this studying is caseless, scan the created bit map and duplicate the
305     bits for any letters. */
306    
307     if (caseless)
308     {
309     register int c;
310     for (c = 0; c < 256; c++)
311     {
312     if ((start_bits[c/8] & (1 << (c&7))) != 0 &&
313     (pcre_ctypes[c] & ctype_letter) != 0)
314     {
315     int d = pcre_fcc[c];
316     start_bits[d/8] |= (1 << (d&7));
317     }
318     }
319     }
320    
321     /* Get an "extra" block and put the information therein. */
322    
323     extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
324    
325     if (extra == NULL)
326     {
327     *errorptr = "failed to get memory";
328     return NULL;
329     }
330    
331     extra->options = PCRE_STUDY_MAPPED | (caseless? PCRE_STUDY_CASELESS : 0);
332     memcpy(extra->start_bits, start_bits, sizeof(start_bits));
333    
334     return (pcre_extra *)extra;
335     }
336    
337     /* End of study.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12