/[pcre]/code/trunk/pcreposix.c
ViewVC logotype

Contents of /code/trunk/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 73 - (show annotations) (download)
Sat Feb 24 21:40:30 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 10393 byte(s)
Load pcre-4.5 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
11 functions.
12
13 Written by: Philip Hazel <ph10@cam.ac.uk>
14
15 Copyright (c) 1997-2003 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Permission is granted to anyone to use this software for any purpose on any
19 computer system, and to redistribute it freely, subject to the following
20 restrictions:
21
22 1. This software is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
26 2. The origin of this software must not be misrepresented, either by
27 explicit claim or by omission.
28
29 3. Altered versions must be plainly marked as such, and must not be
30 misrepresented as being the original software.
31
32 4. If PCRE is embedded in any software that is released under the GNU
33 General Purpose Licence (GPL), then the terms of that licence shall
34 supersede any condition above with which it is incompatible.
35 -----------------------------------------------------------------------------
36 */
37
38 #include "internal.h"
39 #include "pcreposix.h"
40 #include "stdlib.h"
41
42
43
44 /* Corresponding tables of PCRE error messages and POSIX error codes. */
45
46 static const char *const estring[] = {
47 ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48 ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49 ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50 ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51 ERR41, ERR42, ERR43, ERR44 };
52
53 static const int eint[] = {
54 REG_EESCAPE, /* "\\ at end of pattern" */
55 REG_EESCAPE, /* "\\c at end of pattern" */
56 REG_EESCAPE, /* "unrecognized character follows \\" */
57 REG_BADBR, /* "numbers out of order in {} quantifier" */
58 REG_BADBR, /* "number too big in {} quantifier" */
59 REG_EBRACK, /* "missing terminating ] for character class" */
60 REG_ECTYPE, /* "invalid escape sequence in character class" */
61 REG_ERANGE, /* "range out of order in character class" */
62 REG_BADRPT, /* "nothing to repeat" */
63 REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
64 REG_ASSERT, /* "internal error: unexpected repeat" */
65 REG_BADPAT, /* "unrecognized character after (?" */
66 REG_BADPAT, /* "POSIX named classes are supported only within a class" */
67 REG_EPAREN, /* "missing )" */
68 REG_ESUBREG, /* "reference to non-existent subpattern" */
69 REG_INVARG, /* "erroffset passed as NULL" */
70 REG_INVARG, /* "unknown option bit(s) set" */
71 REG_EPAREN, /* "missing ) after comment" */
72 REG_ESIZE, /* "parentheses nested too deeply" */
73 REG_ESIZE, /* "regular expression too large" */
74 REG_ESPACE, /* "failed to get memory" */
75 REG_EPAREN, /* "unmatched brackets" */
76 REG_ASSERT, /* "internal error: code overflow" */
77 REG_BADPAT, /* "unrecognized character after (?<" */
78 REG_BADPAT, /* "lookbehind assertion is not fixed length" */
79 REG_BADPAT, /* "malformed number after (?(" */
80 REG_BADPAT, /* "conditional group containe more than two branches" */
81 REG_BADPAT, /* "assertion expected after (?(" */
82 REG_BADPAT, /* "(?R or (?digits must be followed by )" */
83 REG_ECTYPE, /* "unknown POSIX class name" */
84 REG_BADPAT, /* "POSIX collating elements are not supported" */
85 REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86 REG_BADPAT, /* "spare error" */
87 REG_BADPAT, /* "character value in \x{...} sequence is too large" */
88 REG_BADPAT, /* "invalid condition (?(0)" */
89 REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
90 REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91 REG_BADPAT, /* "number after (?C is > 255" */
92 REG_BADPAT, /* "closing ) for (?C expected" */
93 REG_BADPAT, /* "recursive call could loop indefinitely" */
94 REG_BADPAT, /* "unrecognized character after (?P" */
95 REG_BADPAT, /* "syntax error after (?P" */
96 REG_BADPAT, /* "two named groups have the same name" */
97 REG_BADPAT /* "invalid UTF-8 string" */
98 };
99
100 /* Table of texts corresponding to POSIX error codes */
101
102 static const char *const pstring[] = {
103 "", /* Dummy for value 0 */
104 "internal error", /* REG_ASSERT */
105 "invalid repeat counts in {}", /* BADBR */
106 "pattern error", /* BADPAT */
107 "? * + invalid", /* BADRPT */
108 "unbalanced {}", /* EBRACE */
109 "unbalanced []", /* EBRACK */
110 "collation error - not relevant", /* ECOLLATE */
111 "bad class", /* ECTYPE */
112 "bad escape sequence", /* EESCAPE */
113 "empty expression", /* EMPTY */
114 "unbalanced ()", /* EPAREN */
115 "bad range inside []", /* ERANGE */
116 "expression too big", /* ESIZE */
117 "failed to get memory", /* ESPACE */
118 "bad back reference", /* ESUBREG */
119 "bad argument", /* INVARG */
120 "match failed" /* NOMATCH */
121 };
122
123
124
125
126 /*************************************************
127 * Translate PCRE text code to int *
128 *************************************************/
129
130 /* PCRE compile-time errors are given as strings defined as macros. We can just
131 look them up in a table to turn them into POSIX-style error codes. */
132
133 static int
134 pcre_posix_error_code(const char *s)
135 {
136 size_t i;
137 for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
138 if (strcmp(s, estring[i]) == 0) return eint[i];
139 return REG_ASSERT;
140 }
141
142
143
144 /*************************************************
145 * Translate error code to string *
146 *************************************************/
147
148 EXPORT size_t
149 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
150 {
151 const char *message, *addmessage;
152 size_t length, addlength;
153
154 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
155 "unknown error code" : pstring[errcode];
156 length = strlen(message) + 1;
157
158 addmessage = " at offset ";
159 addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
160 strlen(addmessage) + 6 : 0;
161
162 if (errbuf_size > 0)
163 {
164 if (addlength > 0 && errbuf_size >= length + addlength)
165 sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
166 else
167 {
168 strncpy(errbuf, message, errbuf_size - 1);
169 errbuf[errbuf_size-1] = 0;
170 }
171 }
172
173 return length + addlength;
174 }
175
176
177
178
179 /*************************************************
180 * Free store held by a regex *
181 *************************************************/
182
183 EXPORT void
184 regfree(regex_t *preg)
185 {
186 (pcre_free)(preg->re_pcre);
187 }
188
189
190
191
192 /*************************************************
193 * Compile a regular expression *
194 *************************************************/
195
196 /*
197 Arguments:
198 preg points to a structure for recording the compiled expression
199 pattern the pattern to compile
200 cflags compilation flags
201
202 Returns: 0 on success
203 various non-zero codes on failure
204 */
205
206 EXPORT int
207 regcomp(regex_t *preg, const char *pattern, int cflags)
208 {
209 const char *errorptr;
210 int erroffset;
211 int options = 0;
212
213 if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
214 if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
215
216 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
217 preg->re_erroffset = erroffset;
218
219 if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
220
221 preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
222 return 0;
223 }
224
225
226
227
228 /*************************************************
229 * Match a regular expression *
230 *************************************************/
231
232 /* Unfortunately, PCRE requires 3 ints of working space for each captured
233 substring, so we have to get and release working store instead of just using
234 the POSIX structures as was done in earlier releases when PCRE needed only 2
235 ints. However, if the number of possible capturing brackets is small, use a
236 block of store on the stack, to reduce the use of malloc/free. The threshold is
237 in a macro that can be changed at configure time. */
238
239 EXPORT int
240 regexec(const regex_t *preg, const char *string, size_t nmatch,
241 regmatch_t pmatch[], int eflags)
242 {
243 int rc;
244 int options = 0;
245 int *ovector = NULL;
246 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
247 BOOL allocated_ovector = FALSE;
248
249 if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
250 if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
251
252 ((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
253
254 if (nmatch > 0)
255 {
256 if (nmatch <= POSIX_MALLOC_THRESHOLD)
257 {
258 ovector = &(small_ovector[0]);
259 }
260 else
261 {
262 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
263 if (ovector == NULL) return REG_ESPACE;
264 allocated_ovector = TRUE;
265 }
266 }
267
268 rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string),
269 0, options, ovector, nmatch * 3);
270
271 if (rc == 0) rc = nmatch; /* All captured slots were filled in */
272
273 if (rc >= 0)
274 {
275 size_t i;
276 for (i = 0; i < (size_t)rc; i++)
277 {
278 pmatch[i].rm_so = ovector[i*2];
279 pmatch[i].rm_eo = ovector[i*2+1];
280 }
281 if (allocated_ovector) free(ovector);
282 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
283 return 0;
284 }
285
286 else
287 {
288 if (allocated_ovector) free(ovector);
289 switch(rc)
290 {
291 case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
292 case PCRE_ERROR_NULL: return REG_INVARG;
293 case PCRE_ERROR_BADOPTION: return REG_INVARG;
294 case PCRE_ERROR_BADMAGIC: return REG_INVARG;
295 case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
296 case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
297 case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
298 case PCRE_ERROR_BADUTF8: return REG_INVARG;
299 case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
300 default: return REG_ASSERT;
301 }
302 }
303 }
304
305 /* End of pcreposix.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12