/[pcre]/code/tags/pcre-7.1/pcreposix.c
ViewVC logotype

Contents of /code/tags/pcre-7.1/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 49 - (show annotations) (download)
Sat Feb 24 21:39:33 2007 UTC (7 years, 1 month ago) by nigel
Original Path: code/trunk/pcreposix.c
File MIME type: text/plain
File size: 9094 byte(s)
Load pcre-3.3 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 This module is a wrapper that provides a POSIX API to the underlying PCRE
11 functions.
12
13 Written by: Philip Hazel <ph10@cam.ac.uk>
14
15 Copyright (c) 1997-2000 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Permission is granted to anyone to use this software for any purpose on any
19 computer system, and to redistribute it freely, subject to the following
20 restrictions:
21
22 1. This software is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
26 2. The origin of this software must not be misrepresented, either by
27 explicit claim or by omission.
28
29 3. Altered versions must be plainly marked as such, and must not be
30 misrepresented as being the original software.
31
32 4. If PCRE is embedded in any software that is released under the GNU
33 General Purpose Licence (GPL), then the terms of that licence shall
34 supersede any condition above with which it is incompatible.
35 -----------------------------------------------------------------------------
36 */
37
38 #include "internal.h"
39 #include "pcreposix.h"
40 #include "stdlib.h"
41
42
43
44 /* Corresponding tables of PCRE error messages and POSIX error codes. */
45
46 static const char *estring[] = {
47 ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
48 ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49 ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50 ERR31 };
51
52 static int eint[] = {
53 REG_EESCAPE, /* "\\ at end of pattern" */
54 REG_EESCAPE, /* "\\c at end of pattern" */
55 REG_EESCAPE, /* "unrecognized character follows \\" */
56 REG_BADBR, /* "numbers out of order in {} quantifier" */
57 REG_BADBR, /* "number too big in {} quantifier" */
58 REG_EBRACK, /* "missing terminating ] for character class" */
59 REG_ECTYPE, /* "invalid escape sequence in character class" */
60 REG_ERANGE, /* "range out of order in character class" */
61 REG_BADRPT, /* "nothing to repeat" */
62 REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */
63 REG_ASSERT, /* "internal error: unexpected repeat" */
64 REG_BADPAT, /* "unrecognized character after (?" */
65 REG_ESIZE, /* "too many capturing parenthesized sub-patterns" */
66 REG_EPAREN, /* "missing )" */
67 REG_ESUBREG, /* "back reference to non-existent subpattern" */
68 REG_INVARG, /* "erroffset passed as NULL" */
69 REG_INVARG, /* "unknown option bit(s) set" */
70 REG_EPAREN, /* "missing ) after comment" */
71 REG_ESIZE, /* "too many sets of parentheses" */
72 REG_ESIZE, /* "regular expression too large" */
73 REG_ESPACE, /* "failed to get memory" */
74 REG_EPAREN, /* "unmatched brackets" */
75 REG_ASSERT, /* "internal error: code overflow" */
76 REG_BADPAT, /* "unrecognized character after (?<" */
77 REG_BADPAT, /* "lookbehind assertion is not fixed length" */
78 REG_BADPAT, /* "malformed number after (?(" */
79 REG_BADPAT, /* "conditional group containe more than two branches" */
80 REG_BADPAT, /* "assertion expected after (?(" */
81 REG_BADPAT, /* "(?p must be followed by )" */
82 REG_ECTYPE, /* "unknown POSIX class name" */
83 REG_BADPAT, /* "POSIX collating elements are not supported" */
84 REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
85 REG_BADPAT, /* "characters with values > 255 are not yet supported in classes" */
86 REG_BADPAT /* "character value in \x{...} sequence is too large" */
87 };
88
89 /* Table of texts corresponding to POSIX error codes */
90
91 static const char *pstring[] = {
92 "", /* Dummy for value 0 */
93 "internal error", /* REG_ASSERT */
94 "invalid repeat counts in {}", /* BADBR */
95 "pattern error", /* BADPAT */
96 "? * + invalid", /* BADRPT */
97 "unbalanced {}", /* EBRACE */
98 "unbalanced []", /* EBRACK */
99 "collation error - not relevant", /* ECOLLATE */
100 "bad class", /* ECTYPE */
101 "bad escape sequence", /* EESCAPE */
102 "empty expression", /* EMPTY */
103 "unbalanced ()", /* EPAREN */
104 "bad range inside []", /* ERANGE */
105 "expression too big", /* ESIZE */
106 "failed to get memory", /* ESPACE */
107 "bad back reference", /* ESUBREG */
108 "bad argument", /* INVARG */
109 "match failed" /* NOMATCH */
110 };
111
112
113
114
115 /*************************************************
116 * Translate PCRE text code to int *
117 *************************************************/
118
119 /* PCRE compile-time errors are given as strings defined as macros. We can just
120 look them up in a table to turn them into POSIX-style error codes. */
121
122 static int
123 pcre_posix_error_code(const char *s)
124 {
125 size_t i;
126 for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
127 if (strcmp(s, estring[i]) == 0) return eint[i];
128 return REG_ASSERT;
129 }
130
131
132
133 /*************************************************
134 * Translate error code to string *
135 *************************************************/
136
137 size_t
138 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
139 {
140 const char *message, *addmessage;
141 size_t length, addlength;
142
143 message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
144 "unknown error code" : pstring[errcode];
145 length = strlen(message) + 1;
146
147 addmessage = " at offset ";
148 addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
149 strlen(addmessage) + 6 : 0;
150
151 if (errbuf_size > 0)
152 {
153 if (addlength > 0 && errbuf_size >= length + addlength)
154 sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
155 else
156 {
157 strncpy(errbuf, message, errbuf_size - 1);
158 errbuf[errbuf_size-1] = 0;
159 }
160 }
161
162 return length + addlength;
163 }
164
165
166
167
168 /*************************************************
169 * Free store held by a regex *
170 *************************************************/
171
172 void
173 regfree(regex_t *preg)
174 {
175 (pcre_free)(preg->re_pcre);
176 }
177
178
179
180
181 /*************************************************
182 * Compile a regular expression *
183 *************************************************/
184
185 /*
186 Arguments:
187 preg points to a structure for recording the compiled expression
188 pattern the pattern to compile
189 cflags compilation flags
190
191 Returns: 0 on success
192 various non-zero codes on failure
193 */
194
195 int
196 regcomp(regex_t *preg, const char *pattern, int cflags)
197 {
198 const char *errorptr;
199 int erroffset;
200 int options = 0;
201
202 if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
203 if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
204
205 preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
206 preg->re_erroffset = erroffset;
207
208 if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
209
210 preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL);
211 return 0;
212 }
213
214
215
216
217 /*************************************************
218 * Match a regular expression *
219 *************************************************/
220
221 /* Unfortunately, PCRE requires 3 ints of working space for each captured
222 substring, so we have to get and release working store instead of just using
223 the POSIX structures as was done in earlier releases when PCRE needed only 2
224 ints. */
225
226 int
227 regexec(regex_t *preg, const char *string, size_t nmatch,
228 regmatch_t pmatch[], int eflags)
229 {
230 int rc;
231 int options = 0;
232 int *ovector = NULL;
233
234 if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
235 if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
236
237 preg->re_erroffset = (size_t)(-1); /* Only has meaning after compile */
238
239 if (nmatch > 0)
240 {
241 ovector = (int *)malloc(sizeof(int) * nmatch * 3);
242 if (ovector == NULL) return REG_ESPACE;
243 }
244
245 rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
246 ovector, nmatch * 3);
247
248 if (rc == 0) rc = nmatch; /* All captured slots were filled in */
249
250 if (rc >= 0)
251 {
252 size_t i;
253 for (i = 0; i < rc; i++)
254 {
255 pmatch[i].rm_so = ovector[i*2];
256 pmatch[i].rm_eo = ovector[i*2+1];
257 }
258 if (ovector != NULL) free(ovector);
259 for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
260 return 0;
261 }
262
263 else
264 {
265 if (ovector != NULL) free(ovector);
266 switch(rc)
267 {
268 case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
269 case PCRE_ERROR_NULL: return REG_INVARG;
270 case PCRE_ERROR_BADOPTION: return REG_INVARG;
271 case PCRE_ERROR_BADMAGIC: return REG_INVARG;
272 case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
273 case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
274 default: return REG_ASSERT;
275 }
276 }
277 }
278
279 /* End of pcreposix.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12