/[pcre]/code/tags/pcre-4.0/pcreposix.c
ViewVC logotype

Contents of /code/tags/pcre-4.0/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 64 - (hide annotations) (download)
Sat Feb 24 21:40:05 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 10096 byte(s)
Tag code/trunk as code/tags/pcre-4.0.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     This module is a wrapper that provides a POSIX API to the underlying PCRE
11     functions.
12    
13     Written by: Philip Hazel <ph10@cam.ac.uk>
14    
15 nigel 63 Copyright (c) 1997-2003 University of Cambridge
16 nigel 3
17     -----------------------------------------------------------------------------
18     Permission is granted to anyone to use this software for any purpose on any
19     computer system, and to redistribute it freely, subject to the following
20     restrictions:
21    
22     1. This software is distributed in the hope that it will be useful,
23     but WITHOUT ANY WARRANTY; without even the implied warranty of
24     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25    
26     2. The origin of this software must not be misrepresented, either by
27     explicit claim or by omission.
28    
29     3. Altered versions must be plainly marked as such, and must not be
30     misrepresented as being the original software.
31 nigel 29
32     4. If PCRE is embedded in any software that is released under the GNU
33     General Purpose Licence (GPL), then the terms of that licence shall
34     supersede any condition above with which it is incompatible.
35 nigel 3 -----------------------------------------------------------------------------
36     */
37    
38     #include "internal.h"
39     #include "pcreposix.h"
40     #include "stdlib.h"
41    
42    
43    
44     /* Corresponding tables of PCRE error messages and POSIX error codes. */
45    
46 nigel 7 static const char *estring[] = {
47 nigel 3 ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48     ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49 nigel 43 ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50 nigel 63 ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51     ERR41, ERR42, ERR43 };
52 nigel 3
53     static int eint[] = {
54     REG_EESCAPE, /* "\\ at end of pattern" */
55     REG_EESCAPE, /* "\\c at end of pattern" */
56     REG_EESCAPE, /* "unrecognized character follows \\" */
57     REG_BADBR, /* "numbers out of order in {} quantifier" */
58     REG_BADBR, /* "number too big in {} quantifier" */
59     REG_EBRACK, /* "missing terminating ] for character class" */
60     REG_ECTYPE, /* "invalid escape sequence in character class" */
61     REG_ERANGE, /* "range out of order in character class" */
62     REG_BADRPT, /* "nothing to repeat" */
63     REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
64     REG_ASSERT, /* "internal error: unexpected repeat" */
65     REG_BADPAT, /* "unrecognized character after (?" */
66 nigel 63 REG_BADPAT, /* "POSIX named classes are supported only within a class" */
67 nigel 3 REG_EPAREN, /* "missing )" */
68 nigel 63 REG_ESUBREG, /* "reference to non-existent subpattern" */
69 nigel 3 REG_INVARG, /* "erroffset passed as NULL" */
70     REG_INVARG, /* "unknown option bit(s) set" */
71     REG_EPAREN, /* "missing ) after comment" */
72 nigel 53 REG_ESIZE, /* "parentheses nested too deeply" */
73 nigel 3 REG_ESIZE, /* "regular expression too large" */
74     REG_ESPACE, /* "failed to get memory" */
75     REG_EPAREN, /* "unmatched brackets" */
76 nigel 23 REG_ASSERT, /* "internal error: code overflow" */
77     REG_BADPAT, /* "unrecognized character after (?<" */
78     REG_BADPAT, /* "lookbehind assertion is not fixed length" */
79     REG_BADPAT, /* "malformed number after (?(" */
80     REG_BADPAT, /* "conditional group containe more than two branches" */
81 nigel 43 REG_BADPAT, /* "assertion expected after (?(" */
82 nigel 63 REG_BADPAT, /* "(?R or (?digits must be followed by )" */
83 nigel 43 REG_ECTYPE, /* "unknown POSIX class name" */
84 nigel 49 REG_BADPAT, /* "POSIX collating elements are not supported" */
85     REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86 nigel 63 REG_BADPAT, /* "spare error" */
87 nigel 51 REG_BADPAT, /* "character value in \x{...} sequence is too large" */
88 nigel 63 REG_BADPAT, /* "invalid condition (?(0)" */
89     REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
90     REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91     REG_BADPAT, /* "number after (?C is > 255" */
92     REG_BADPAT, /* "closing ) for (?C expected" */
93     REG_BADPAT, /* "recursive call could loop indefinitely" */
94     REG_BADPAT, /* "unrecognized character after (?P" */
95     REG_BADPAT, /* "syntax error after (?P" */
96     REG_BADPAT /* "two named groups have the same name" */
97 nigel 3 };
98    
99     /* Table of texts corresponding to POSIX error codes */
100    
101 nigel 7 static const char *pstring[] = {
102 nigel 3 "", /* Dummy for value 0 */
103     "internal error", /* REG_ASSERT */
104     "invalid repeat counts in {}", /* BADBR */
105     "pattern error", /* BADPAT */
106     "? * + invalid", /* BADRPT */
107     "unbalanced {}", /* EBRACE */
108     "unbalanced []", /* EBRACK */
109     "collation error - not relevant", /* ECOLLATE */
110     "bad class", /* ECTYPE */
111     "bad escape sequence", /* EESCAPE */
112     "empty expression", /* EMPTY */
113     "unbalanced ()", /* EPAREN */
114     "bad range inside []", /* ERANGE */
115     "expression too big", /* ESIZE */
116     "failed to get memory", /* ESPACE */
117     "bad back reference", /* ESUBREG */
118     "bad argument", /* INVARG */
119     "match failed" /* NOMATCH */
120     };
121    
122    
123    
124    
125     /*************************************************
126     * Translate PCRE text code to int *
127     *************************************************/
128    
129     /* PCRE compile-time errors are given as strings defined as macros. We can just
130     look them up in a table to turn them into POSIX-style error codes. */
131    
132     static int
133     pcre_posix_error_code(const char *s)
134     {
135 nigel 7 size_t i;
136 nigel 3 for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
137     if (strcmp(s, estring[i]) == 0) return eint[i];
138     return REG_ASSERT;
139     }
140    
141    
142    
143     /*************************************************
144     * Translate error code to string *
145     *************************************************/
146    
147     size_t
148     regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
149     {
150 nigel 7 const char *message, *addmessage;
151     size_t length, addlength;
152 nigel 3
153 nigel 7 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
154 nigel 3 "unknown error code" : pstring[errcode];
155 nigel 7 length = strlen(message) + 1;
156 nigel 3
157 nigel 7 addmessage = " at offset ";
158     addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
159     strlen(addmessage) + 6 : 0;
160 nigel 3
161     if (errbuf_size > 0)
162     {
163 nigel 7 if (addlength > 0 && errbuf_size >= length + addlength)
164 nigel 23 sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
165 nigel 3 else
166     {
167     strncpy(errbuf, message, errbuf_size - 1);
168     errbuf[errbuf_size-1] = 0;
169     }
170     }
171    
172 nigel 7 return length + addlength;
173 nigel 3 }
174    
175    
176    
177    
178     /*************************************************
179     * Free store held by a regex *
180     *************************************************/
181    
182     void
183     regfree(regex_t *preg)
184     {
185     (pcre_free)(preg->re_pcre);
186     }
187    
188    
189    
190    
191     /*************************************************
192     * Compile a regular expression *
193     *************************************************/
194    
195     /*
196     Arguments:
197     preg points to a structure for recording the compiled expression
198     pattern the pattern to compile
199     cflags compilation flags
200    
201     Returns: 0 on success
202     various non-zero codes on failure
203     */
204    
205     int
206     regcomp(regex_t *preg, const char *pattern, int cflags)
207     {
208 nigel 7 const char *errorptr;
209 nigel 3 int erroffset;
210     int options = 0;
211    
212     if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
213     if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
214    
215 nigel 25 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
216 nigel 3 preg->re_erroffset = erroffset;
217    
218     if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
219    
220     preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL);
221     return 0;
222     }
223    
224    
225    
226    
227     /*************************************************
228     * Match a regular expression *
229     *************************************************/
230    
231 nigel 41 /* Unfortunately, PCRE requires 3 ints of working space for each captured
232     substring, so we have to get and release working store instead of just using
233     the POSIX structures as was done in earlier releases when PCRE needed only 2
234 nigel 63 ints. However, if the number of possible capturing brackets is small, use a
235     block of store on the stack, to reduce the use of malloc/free. The threshold is
236     in a macro that can be changed at configure time. */
237 nigel 41
238 nigel 3 int
239     regexec(regex_t *preg, const char *string, size_t nmatch,
240     regmatch_t pmatch[], int eflags)
241     {
242     int rc;
243     int options = 0;
244 nigel 41 int *ovector = NULL;
245 nigel 63 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
246     BOOL allocated_ovector = FALSE;
247 nigel 3
248     if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
249     if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
250    
251 nigel 15 preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
252 nigel 3
253 nigel 41 if (nmatch > 0)
254     {
255 nigel 63 if (nmatch <= POSIX_MALLOC_THRESHOLD)
256     {
257     ovector = &(small_ovector[0]);
258     }
259     else
260     {
261     ovector = (int *)malloc(sizeof(int) * nmatch * 3);
262     if (ovector == NULL) return REG_ESPACE;
263     allocated_ovector = TRUE;
264     }
265 nigel 41 }
266    
267 nigel 35 rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
268 nigel 41 ovector, nmatch * 3);
269 nigel 3
270 nigel 41 if (rc == 0) rc = nmatch; /* All captured slots were filled in */
271 nigel 3
272 nigel 41 if (rc >= 0)
273 nigel 3 {
274 nigel 7 size_t i;
275 nigel 63 for (i = 0; i < (size_t)rc; i++)
276 nigel 41 {
277     pmatch[i].rm_so = ovector[i*2];
278     pmatch[i].rm_eo = ovector[i*2+1];
279     }
280 nigel 63 if (allocated_ovector) free(ovector);
281 nigel 41 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
282 nigel 3 return 0;
283     }
284    
285 nigel 41 else
286 nigel 3 {
287 nigel 63 if (allocated_ovector) free(ovector);
288 nigel 41 switch(rc)
289     {
290     case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
291     case PCRE_ERROR_NULL: return REG_INVARG;
292     case PCRE_ERROR_BADOPTION: return REG_INVARG;
293     case PCRE_ERROR_BADMAGIC: return REG_INVARG;
294     case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
295     case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
296     default: return REG_ASSERT;
297     }
298 nigel 3 }
299     }
300    
301     /* End of pcreposix.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12