/[pcre]/code/tags/pcre-4.2/pcreposix.c
ViewVC logotype

Contents of /code/tags/pcre-4.2/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 68 - (show annotations) (download)
Sat Feb 24 21:40:15 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 10096 byte(s)
Tag code/trunk as code/tags/pcre-4.2.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
11 functions.
12
13 Written by: Philip Hazel <ph10@cam.ac.uk>
14
15 Copyright (c) 1997-2003 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Permission is granted to anyone to use this software for any purpose on any
19 computer system, and to redistribute it freely, subject to the following
20 restrictions:
21
22 1. This software is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
26 2. The origin of this software must not be misrepresented, either by
27 explicit claim or by omission.
28
29 3. Altered versions must be plainly marked as such, and must not be
30 misrepresented as being the original software.
31
32 4. If PCRE is embedded in any software that is released under the GNU
33 General Purpose Licence (GPL), then the terms of that licence shall
34 supersede any condition above with which it is incompatible.
35 -----------------------------------------------------------------------------
36 */
37
38 #include "internal.h"
39 #include "pcreposix.h"
40 #include "stdlib.h"
41
42
43
44 /* Corresponding tables of PCRE error messages and POSIX error codes. */
45
46 static const char *estring[] = {
47 ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48 ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49 ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50 ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51 ERR41, ERR42, ERR43 };
52
53 static int eint[] = {
54 REG_EESCAPE, /* "\\ at end of pattern" */
55 REG_EESCAPE, /* "\\c at end of pattern" */
56 REG_EESCAPE, /* "unrecognized character follows \\" */
57 REG_BADBR, /* "numbers out of order in {} quantifier" */
58 REG_BADBR, /* "number too big in {} quantifier" */
59 REG_EBRACK, /* "missing terminating ] for character class" */
60 REG_ECTYPE, /* "invalid escape sequence in character class" */
61 REG_ERANGE, /* "range out of order in character class" */
62 REG_BADRPT, /* "nothing to repeat" */
63 REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
64 REG_ASSERT, /* "internal error: unexpected repeat" */
65 REG_BADPAT, /* "unrecognized character after (?" */
66 REG_BADPAT, /* "POSIX named classes are supported only within a class" */
67 REG_EPAREN, /* "missing )" */
68 REG_ESUBREG, /* "reference to non-existent subpattern" */
69 REG_INVARG, /* "erroffset passed as NULL" */
70 REG_INVARG, /* "unknown option bit(s) set" */
71 REG_EPAREN, /* "missing ) after comment" */
72 REG_ESIZE, /* "parentheses nested too deeply" */
73 REG_ESIZE, /* "regular expression too large" */
74 REG_ESPACE, /* "failed to get memory" */
75 REG_EPAREN, /* "unmatched brackets" */
76 REG_ASSERT, /* "internal error: code overflow" */
77 REG_BADPAT, /* "unrecognized character after (?<" */
78 REG_BADPAT, /* "lookbehind assertion is not fixed length" */
79 REG_BADPAT, /* "malformed number after (?(" */
80 REG_BADPAT, /* "conditional group containe more than two branches" */
81 REG_BADPAT, /* "assertion expected after (?(" */
82 REG_BADPAT, /* "(?R or (?digits must be followed by )" */
83 REG_ECTYPE, /* "unknown POSIX class name" */
84 REG_BADPAT, /* "POSIX collating elements are not supported" */
85 REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86 REG_BADPAT, /* "spare error" */
87 REG_BADPAT, /* "character value in \x{...} sequence is too large" */
88 REG_BADPAT, /* "invalid condition (?(0)" */
89 REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */
90 REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91 REG_BADPAT, /* "number after (?C is > 255" */
92 REG_BADPAT, /* "closing ) for (?C expected" */
93 REG_BADPAT, /* "recursive call could loop indefinitely" */
94 REG_BADPAT, /* "unrecognized character after (?P" */
95 REG_BADPAT, /* "syntax error after (?P" */
96 REG_BADPAT /* "two named groups have the same name" */
97 };
98
99 /* Table of texts corresponding to POSIX error codes */
100
101 static const char *pstring[] = {
102 "", /* Dummy for value 0 */
103 "internal error", /* REG_ASSERT */
104 "invalid repeat counts in {}", /* BADBR */
105 "pattern error", /* BADPAT */
106 "? * + invalid", /* BADRPT */
107 "unbalanced {}", /* EBRACE */
108 "unbalanced []", /* EBRACK */
109 "collation error - not relevant", /* ECOLLATE */
110 "bad class", /* ECTYPE */
111 "bad escape sequence", /* EESCAPE */
112 "empty expression", /* EMPTY */
113 "unbalanced ()", /* EPAREN */
114 "bad range inside []", /* ERANGE */
115 "expression too big", /* ESIZE */
116 "failed to get memory", /* ESPACE */
117 "bad back reference", /* ESUBREG */
118 "bad argument", /* INVARG */
119 "match failed" /* NOMATCH */
120 };
121
122
123
124
125 /*************************************************
126 * Translate PCRE text code to int *
127 *************************************************/
128
129 /* PCRE compile-time errors are given as strings defined as macros. We can just
130 look them up in a table to turn them into POSIX-style error codes. */
131
132 static int
133 pcre_posix_error_code(const char *s)
134 {
135 size_t i;
136 for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
137 if (strcmp(s, estring[i]) == 0) return eint[i];
138 return REG_ASSERT;
139 }
140
141
142
143 /*************************************************
144 * Translate error code to string *
145 *************************************************/
146
147 size_t
148 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
149 {
150 const char *message, *addmessage;
151 size_t length, addlength;
152
153 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
154 "unknown error code" : pstring[errcode];
155 length = strlen(message) + 1;
156
157 addmessage = " at offset ";
158 addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
159 strlen(addmessage) + 6 : 0;
160
161 if (errbuf_size > 0)
162 {
163 if (addlength > 0 && errbuf_size >= length + addlength)
164 sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
165 else
166 {
167 strncpy(errbuf, message, errbuf_size - 1);
168 errbuf[errbuf_size-1] = 0;
169 }
170 }
171
172 return length + addlength;
173 }
174
175
176
177
178 /*************************************************
179 * Free store held by a regex *
180 *************************************************/
181
182 void
183 regfree(regex_t *preg)
184 {
185 (pcre_free)(preg->re_pcre);
186 }
187
188
189
190
191 /*************************************************
192 * Compile a regular expression *
193 *************************************************/
194
195 /*
196 Arguments:
197 preg points to a structure for recording the compiled expression
198 pattern the pattern to compile
199 cflags compilation flags
200
201 Returns: 0 on success
202 various non-zero codes on failure
203 */
204
205 int
206 regcomp(regex_t *preg, const char *pattern, int cflags)
207 {
208 const char *errorptr;
209 int erroffset;
210 int options = 0;
211
212 if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
213 if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
214
215 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
216 preg->re_erroffset = erroffset;
217
218 if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
219
220 preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL);
221 return 0;
222 }
223
224
225
226
227 /*************************************************
228 * Match a regular expression *
229 *************************************************/
230
231 /* Unfortunately, PCRE requires 3 ints of working space for each captured
232 substring, so we have to get and release working store instead of just using
233 the POSIX structures as was done in earlier releases when PCRE needed only 2
234 ints. However, if the number of possible capturing brackets is small, use a
235 block of store on the stack, to reduce the use of malloc/free. The threshold is
236 in a macro that can be changed at configure time. */
237
238 int
239 regexec(regex_t *preg, const char *string, size_t nmatch,
240 regmatch_t pmatch[], int eflags)
241 {
242 int rc;
243 int options = 0;
244 int *ovector = NULL;
245 int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
246 BOOL allocated_ovector = FALSE;
247
248 if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
249 if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
250
251 preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
252
253 if (nmatch > 0)
254 {
255 if (nmatch <= POSIX_MALLOC_THRESHOLD)
256 {
257 ovector = &(small_ovector[0]);
258 }
259 else
260 {
261 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
262 if (ovector == NULL) return REG_ESPACE;
263 allocated_ovector = TRUE;
264 }
265 }
266
267 rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
268 ovector, nmatch * 3);
269
270 if (rc == 0) rc = nmatch; /* All captured slots were filled in */
271
272 if (rc >= 0)
273 {
274 size_t i;
275 for (i = 0; i < (size_t)rc; i++)
276 {
277 pmatch[i].rm_so = ovector[i*2];
278 pmatch[i].rm_eo = ovector[i*2+1];
279 }
280 if (allocated_ovector) free(ovector);
281 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
282 return 0;
283 }
284
285 else
286 {
287 if (allocated_ovector) free(ovector);
288 switch(rc)
289 {
290 case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
291 case PCRE_ERROR_NULL: return REG_INVARG;
292 case PCRE_ERROR_BADOPTION: return REG_INVARG;
293 case PCRE_ERROR_BADMAGIC: return REG_INVARG;
294 case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
295 case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
296 default: return REG_ASSERT;
297 }
298 }
299 }
300
301 /* End of pcreposix.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12