/[pcre]/code/trunk/pcreposix.c
ViewVC logotype

Contents of /code/trunk/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 71 - (hide annotations) (download)
Sat Feb 24 21:40:24 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 10193 byte(s)
Load pcre-4.4 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     This module is a wrapper that provides a POSIX API to the underlying PCRE
11     functions.
12    
13     Written by: Philip Hazel <ph10@cam.ac.uk>
14    
15 nigel 63 Copyright (c) 1997-2003 University of Cambridge
16 nigel 3
17     -----------------------------------------------------------------------------
18     Permission is granted to anyone to use this software for any purpose on any
19     computer system, and to redistribute it freely, subject to the following
20     restrictions:
21    
22     1. This software is distributed in the hope that it will be useful,
23     but WITHOUT ANY WARRANTY; without even the implied warranty of
24     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25    
26     2. The origin of this software must not be misrepresented, either by
27     explicit claim or by omission.
28    
29     3. Altered versions must be plainly marked as such, and must not be
30     misrepresented as being the original software.
31 nigel 29
32     4. If PCRE is embedded in any software that is released under the GNU
33     General Purpose Licence (GPL), then the terms of that licence shall
34     supersede any condition above with which it is incompatible.
35 nigel 3 -----------------------------------------------------------------------------
36     */
37    
38     #include "internal.h"
39     #include "pcreposix.h"
40     #include "stdlib.h"
41    
42    
43    
44     /* Corresponding tables of PCRE error messages and POSIX error codes. */
45    
46 nigel 7 static const char *estring[] = {
47 nigel 3 ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48     ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49 nigel 43 ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50 nigel 63 ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51 nigel 71 ERR41, ERR42, ERR43, ERR44 };
52 nigel 3
53     static int eint[] = {
54     REG_EESCAPE, /* "\\ at end of pattern" */
55     REG_EESCAPE, /* "\\c at end of pattern" */
56     REG_EESCAPE, /* "unrecognized character follows \\" */
57     REG_BADBR, /* "numbers out of order in {} quantifier" */
58     REG_BADBR, /* "number too big in {} quantifier" */
59     REG_EBRACK, /* "missing terminating ] for character class" */
60     REG_ECTYPE, /* "invalid escape sequence in character class" */
61     REG_ERANGE, /* "range out of order in character class" */
62     REG_BADRPT, /* "nothing to repeat" */
63     REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
64     REG_ASSERT, /* "internal error: unexpected repeat" */
65     REG_BADPAT, /* "unrecognized character after (?" */
66 nigel 63 REG_BADPAT, /* "POSIX named classes are supported only within a class" */
67 nigel 3 REG_EPAREN, /* "missing )" */
68 nigel 63 REG_ESUBREG, /* "reference to non-existent subpattern" */
69 nigel 3 REG_INVARG, /* "erroffset passed as NULL" */
70     REG_INVARG, /* "unknown option bit(s) set" */
71     REG_EPAREN, /* "missing ) after comment" */
72 nigel 53 REG_ESIZE, /* "parentheses nested too deeply" */
73 nigel 3 REG_ESIZE, /* "regular expression too large" */
74     REG_ESPACE, /* "failed to get memory" */
75     REG_EPAREN, /* "unmatched brackets" */
76 nigel 23 REG_ASSERT, /* "internal error: code overflow" */
77     REG_BADPAT, /* "unrecognized character after (?<" */
78     REG_BADPAT, /* "lookbehind assertion is not fixed length" */
79     REG_BADPAT, /* "malformed number after (?(" */
80     REG_BADPAT, /* "conditional group containe more than two branches" */
81 nigel 43 REG_BADPAT, /* "assertion expected after (?(" */
82 nigel 63 REG_BADPAT, /* "(?R or (?digits must be followed by )" */
83 nigel 43 REG_ECTYPE, /* "unknown POSIX class name" */
84 nigel 49 REG_BADPAT, /* "POSIX collating elements are not supported" */
85     REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86 nigel 63 REG_BADPAT, /* "spare error" */
87 nigel 51 REG_BADPAT, /* "character value in \x{...} sequence is too large" */
88 nigel 63 REG_BADPAT, /* "invalid condition (?(0)" */
89     REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
90     REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91     REG_BADPAT, /* "number after (?C is > 255" */
92     REG_BADPAT, /* "closing ) for (?C expected" */
93     REG_BADPAT, /* "recursive call could loop indefinitely" */
94     REG_BADPAT, /* "unrecognized character after (?P" */
95     REG_BADPAT, /* "syntax error after (?P" */
96 nigel 71 REG_BADPAT, /* "two named groups have the same name" */
97     REG_BADPAT /* "invalid UTF-8 string" */
98 nigel 3 };
99    
100     /* Table of texts corresponding to POSIX error codes */
101    
102 nigel 7 static const char *pstring[] = {
103 nigel 3 "", /* Dummy for value 0 */
104     "internal error", /* REG_ASSERT */
105     "invalid repeat counts in {}", /* BADBR */
106     "pattern error", /* BADPAT */
107     "? * + invalid", /* BADRPT */
108     "unbalanced {}", /* EBRACE */
109     "unbalanced []", /* EBRACK */
110     "collation error - not relevant", /* ECOLLATE */
111     "bad class", /* ECTYPE */
112     "bad escape sequence", /* EESCAPE */
113     "empty expression", /* EMPTY */
114     "unbalanced ()", /* EPAREN */
115     "bad range inside []", /* ERANGE */
116     "expression too big", /* ESIZE */
117     "failed to get memory", /* ESPACE */
118     "bad back reference", /* ESUBREG */
119     "bad argument", /* INVARG */
120     "match failed" /* NOMATCH */
121     };
122    
123    
124    
125    
126     /*************************************************
127     * Translate PCRE text code to int *
128     *************************************************/
129    
130     /* PCRE compile-time errors are given as strings defined as macros. We can just
131     look them up in a table to turn them into POSIX-style error codes. */
132    
133     static int
134     pcre_posix_error_code(const char *s)
135     {
136 nigel 7 size_t i;
137 nigel 3 for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
138     if (strcmp(s, estring[i]) == 0) return eint[i];
139     return REG_ASSERT;
140     }
141    
142    
143    
144     /*************************************************
145     * Translate error code to string *
146     *************************************************/
147    
148     size_t
149     regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
150     {
151 nigel 7 const char *message, *addmessage;
152     size_t length, addlength;
153 nigel 3
154 nigel 7 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
155 nigel 3 "unknown error code" : pstring[errcode];
156 nigel 7 length = strlen(message) + 1;
157 nigel 3
158 nigel 7 addmessage = " at offset ";
159     addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
160     strlen(addmessage) + 6 : 0;
161 nigel 3
162     if (errbuf_size > 0)
163     {
164 nigel 7 if (addlength > 0 && errbuf_size >= length + addlength)
165 nigel 23 sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
166 nigel 3 else
167     {
168     strncpy(errbuf, message, errbuf_size - 1);
169     errbuf[errbuf_size-1] = 0;
170     }
171     }
172    
173 nigel 7 return length + addlength;
174 nigel 3 }
175    
176    
177    
178    
179     /*************************************************
180     * Free store held by a regex *
181     *************************************************/
182    
183     void
184     regfree(regex_t *preg)
185     {
186     (pcre_free)(preg->re_pcre);
187     }
188    
189    
190    
191    
192     /*************************************************
193     * Compile a regular expression *
194     *************************************************/
195    
196     /*
197     Arguments:
198     preg points to a structure for recording the compiled expression
199     pattern the pattern to compile
200     cflags compilation flags
201    
202     Returns: 0 on success
203     various non-zero codes on failure
204     */
205    
206     int
207     regcomp(regex_t *preg, const char *pattern, int cflags)
208     {
209 nigel 7 const char *errorptr;
210 nigel 3 int erroffset;
211     int options = 0;
212    
213     if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
214     if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
215    
216 nigel 25 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
217 nigel 3 preg->re_erroffset = erroffset;
218    
219     if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
220    
221 nigel 71 preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
222 nigel 3 return 0;
223     }
224    
225    
226    
227    
228     /*************************************************
229     * Match a regular expression *
230     *************************************************/
231    
232 nigel 41 /* Unfortunately, PCRE requires 3 ints of working space for each captured
233     substring, so we have to get and release working store instead of just using
234     the POSIX structures as was done in earlier releases when PCRE needed only 2
235 nigel 63 ints. However, if the number of possible capturing brackets is small, use a
236     block of store on the stack, to reduce the use of malloc/free. The threshold is
237     in a macro that can be changed at configure time. */
238 nigel 41
239 nigel 3 int
240 nigel 69 regexec(const regex_t *preg, const char *string, size_t nmatch,
241 nigel 3 regmatch_t pmatch[], int eflags)
242     {
243     int rc;
244     int options = 0;
245 nigel 41 int *ovector = NULL;
246 nigel 63 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
247     BOOL allocated_ovector = FALSE;
248 nigel 3
249     if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
250     if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
251    
252 nigel 69 ((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
253 nigel 3
254 nigel 41 if (nmatch > 0)
255     {
256 nigel 63 if (nmatch <= POSIX_MALLOC_THRESHOLD)
257     {
258     ovector = &(small_ovector[0]);
259     }
260     else
261     {
262     ovector = (int *)malloc(sizeof(int) * nmatch * 3);
263     if (ovector == NULL) return REG_ESPACE;
264     allocated_ovector = TRUE;
265     }
266 nigel 41 }
267    
268 nigel 71 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
269     0, options, ovector, nmatch * 3);
270 nigel 3
271 nigel 41 if (rc == 0) rc = nmatch; /* All captured slots were filled in */
272 nigel 3
273 nigel 41 if (rc >= 0)
274 nigel 3 {
275 nigel 7 size_t i;
276 nigel 63 for (i = 0; i < (size_t)rc; i++)
277 nigel 41 {
278     pmatch[i].rm_so = ovector[i*2];
279     pmatch[i].rm_eo = ovector[i*2+1];
280     }
281 nigel 63 if (allocated_ovector) free(ovector);
282 nigel 41 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
283 nigel 3 return 0;
284     }
285    
286 nigel 41 else
287 nigel 3 {
288 nigel 63 if (allocated_ovector) free(ovector);
289 nigel 41 switch(rc)
290     {
291     case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
292     case PCRE_ERROR_NULL: return REG_INVARG;
293     case PCRE_ERROR_BADOPTION: return REG_INVARG;
294     case PCRE_ERROR_BADMAGIC: return REG_INVARG;
295     case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
296     case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
297     default: return REG_ASSERT;
298     }
299 nigel 3 }
300     }
301    
302     /* End of pcreposix.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12