/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 442 - (show annotations) (download)
Fri Sep 11 10:21:02 2009 UTC (4 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 12277 byte(s)
Added PCRE_NOTEMPTY_ATSTART to fix /g bug when \K is present.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion ("man pcresample" if you have
8 the PCRE man pages installed).
9
10 In Unix-like environments, compile this program thuswise:
11
12 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
13 -R/usr/local/lib -lpcre
14
15 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
16 library files for PCRE are installed on your system. You don't need -I and -L
17 if PCRE is installed in the standard system libraries. Only some operating
18 systems (e.g. Solaris) use the -R option.
19
20 Building under Windows:
21
22 If you want to statically link this program against a non-dll .a file, you must
23 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
24 pcre_free() exported functions will be declared __declspec(dllimport), with
25 unwanted results. So in this environment, uncomment the following line. */
26
27 /* #define PCRE_STATIC */
28
29 #include <stdio.h>
30 #include <string.h>
31 #include <pcre.h>
32
33 #define OVECCOUNT 30 /* should be a multiple of 3 */
34
35
36 int main(int argc, char **argv)
37 {
38 pcre *re;
39 const char *error;
40 char *pattern;
41 char *subject;
42 unsigned char *name_table;
43 int erroffset;
44 int find_all;
45 int namecount;
46 int name_entry_size;
47 int ovector[OVECCOUNT];
48 int subject_length;
49 int rc, i;
50
51
52 /**************************************************************************
53 * First, sort out the command line. There is only one possible option at *
54 * the moment, "-g" to request repeated matching to find all occurrences, *
55 * like Perl's /g option. We set the variable find_all to a non-zero value *
56 * if the -g option is present. Apart from that, there must be exactly two *
57 * arguments. *
58 **************************************************************************/
59
60 find_all = 0;
61 for (i = 1; i < argc; i++)
62 {
63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
64 else break;
65 }
66
67 /* After the options, we require exactly two arguments, which are the pattern,
68 and the subject string. */
69
70 if (argc - i != 2)
71 {
72 printf("Two arguments required: a regex and a subject string\n");
73 return 1;
74 }
75
76 pattern = argv[i];
77 subject = argv[i+1];
78 subject_length = (int)strlen(subject);
79
80
81 /*************************************************************************
82 * Now we are going to compile the regular expression pattern, and handle *
83 * and errors that are detected. *
84 *************************************************************************/
85
86 re = pcre_compile(
87 pattern, /* the pattern */
88 0, /* default options */
89 &error, /* for error message */
90 &erroffset, /* for error offset */
91 NULL); /* use default character tables */
92
93 /* Compilation failed: print the error message and exit */
94
95 if (re == NULL)
96 {
97 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
98 return 1;
99 }
100
101
102 /*************************************************************************
103 * If the compilation succeeded, we call PCRE again, in order to do a *
104 * pattern match against the subject string. This does just ONE match. If *
105 * further matching is needed, it will be done below. *
106 *************************************************************************/
107
108 rc = pcre_exec(
109 re, /* the compiled pattern */
110 NULL, /* no extra data - we didn't study the pattern */
111 subject, /* the subject string */
112 subject_length, /* the length of the subject */
113 0, /* start at offset 0 in the subject */
114 0, /* default options */
115 ovector, /* output vector for substring information */
116 OVECCOUNT); /* number of elements in the output vector */
117
118 /* Matching failed: handle error cases */
119
120 if (rc < 0)
121 {
122 switch(rc)
123 {
124 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
125 /*
126 Handle other special cases if you like
127 */
128 default: printf("Matching error %d\n", rc); break;
129 }
130 pcre_free(re); /* Release memory used for the compiled pattern */
131 return 1;
132 }
133
134 /* Match succeded */
135
136 printf("\nMatch succeeded at offset %d\n", ovector[0]);
137
138
139 /*************************************************************************
140 * We have found the first match within the subject string. If the output *
141 * vector wasn't big enough, say so. Then output any substrings that were *
142 * captured. *
143 *************************************************************************/
144
145 /* The output vector wasn't big enough */
146
147 if (rc == 0)
148 {
149 rc = OVECCOUNT/3;
150 printf("ovector only has room for %d captured substrings\n", rc - 1);
151 }
152
153 /* Show substrings stored in the output vector by number. Obviously, in a real
154 application you might want to do things other than print them. */
155
156 for (i = 0; i < rc; i++)
157 {
158 char *substring_start = subject + ovector[2*i];
159 int substring_length = ovector[2*i+1] - ovector[2*i];
160 printf("%2d: %.*s\n", i, substring_length, substring_start);
161 }
162
163
164 /**************************************************************************
165 * That concludes the basic part of this demonstration program. We have *
166 * compiled a pattern, and performed a single match. The code that follows *
167 * shows first how to access named substrings, and then how to code for *
168 * repeated matches on the same subject. *
169 **************************************************************************/
170
171 /* See if there are any named substrings, and if so, show them by name. First
172 we have to extract the count of named parentheses from the pattern. */
173
174 (void)pcre_fullinfo(
175 re, /* the compiled pattern */
176 NULL, /* no extra data - we didn't study the pattern */
177 PCRE_INFO_NAMECOUNT, /* number of named substrings */
178 &namecount); /* where to put the answer */
179
180 if (namecount <= 0) printf("No named substrings\n"); else
181 {
182 unsigned char *tabptr;
183 printf("Named substrings\n");
184
185 /* Before we can access the substrings, we must extract the table for
186 translating names to numbers, and the size of each entry in the table. */
187
188 (void)pcre_fullinfo(
189 re, /* the compiled pattern */
190 NULL, /* no extra data - we didn't study the pattern */
191 PCRE_INFO_NAMETABLE, /* address of the table */
192 &name_table); /* where to put the answer */
193
194 (void)pcre_fullinfo(
195 re, /* the compiled pattern */
196 NULL, /* no extra data - we didn't study the pattern */
197 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
198 &name_entry_size); /* where to put the answer */
199
200 /* Now we can scan the table and, for each entry, print the number, the name,
201 and the substring itself. */
202
203 tabptr = name_table;
204 for (i = 0; i < namecount; i++)
205 {
206 int n = (tabptr[0] << 8) | tabptr[1];
207 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
208 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
209 tabptr += name_entry_size;
210 }
211 }
212
213
214 /*************************************************************************
215 * If the "-g" option was given on the command line, we want to continue *
216 * to search for additional matches in the subject string, in a similar *
217 * way to the /g option in Perl. This turns out to be trickier than you *
218 * might think because of the possibility of matching an empty string. *
219 * What happens is as follows: *
220 * *
221 * If the previous match was NOT for an empty string, we can just start *
222 * the next match at the end of the previous one. *
223 * *
224 * If the previous match WAS for an empty string, we can't do that, as it *
225 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
226 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
227 * The first of these tells PCRE that an empty string at the start of the *
228 * subject is not a valid match; other possibilities must be tried. The *
229 * second flag restricts PCRE to one match attempt at the initial string *
230 * position. If this match succeeds, an alternative to the empty string *
231 * match has been found, and we can proceed round the loop. *
232 *************************************************************************/
233
234 if (!find_all)
235 {
236 pcre_free(re); /* Release the memory used for the compiled pattern */
237 return 0; /* Finish unless -g was given */
238 }
239
240 /* Loop for second and subsequent matches */
241
242 for (;;)
243 {
244 int options = 0; /* Normally no options */
245 int start_offset = ovector[1]; /* Start at end of previous match */
246
247 /* If the previous match was for an empty string, we are finished if we are
248 at the end of the subject. Otherwise, arrange to run another match at the
249 same point to see if a non-empty match can be found. */
250
251 if (ovector[0] == ovector[1])
252 {
253 if (ovector[0] == subject_length) break;
254 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
255 }
256
257 /* Run the next matching operation */
258
259 rc = pcre_exec(
260 re, /* the compiled pattern */
261 NULL, /* no extra data - we didn't study the pattern */
262 subject, /* the subject string */
263 subject_length, /* the length of the subject */
264 start_offset, /* starting offset in the subject */
265 options, /* options */
266 ovector, /* output vector for substring information */
267 OVECCOUNT); /* number of elements in the output vector */
268
269 /* This time, a result of NOMATCH isn't an error. If the value in "options"
270 is zero, it just means we have found all possible matches, so the loop ends.
271 Otherwise, it means we have failed to find a non-empty-string match at a
272 point where there was a previous empty-string match. In this case, we do what
273 Perl does: advance the matching position by one, and continue. We do this by
274 setting the "end of previous match" offset, because that is picked up at the
275 top of the loop as the point at which to start again. */
276
277 if (rc == PCRE_ERROR_NOMATCH)
278 {
279 if (options == 0) break;
280 ovector[1] = start_offset + 1;
281 continue; /* Go round the loop again */
282 }
283
284 /* Other matching errors are not recoverable. */
285
286 if (rc < 0)
287 {
288 printf("Matching error %d\n", rc);
289 pcre_free(re); /* Release memory used for the compiled pattern */
290 return 1;
291 }
292
293 /* Match succeded */
294
295 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
296
297 /* The match succeeded, but the output vector wasn't big enough. */
298
299 if (rc == 0)
300 {
301 rc = OVECCOUNT/3;
302 printf("ovector only has room for %d captured substrings\n", rc - 1);
303 }
304
305 /* As before, show substrings stored in the output vector by number, and then
306 also any named substrings. */
307
308 for (i = 0; i < rc; i++)
309 {
310 char *substring_start = subject + ovector[2*i];
311 int substring_length = ovector[2*i+1] - ovector[2*i];
312 printf("%2d: %.*s\n", i, substring_length, substring_start);
313 }
314
315 if (namecount <= 0) printf("No named substrings\n"); else
316 {
317 unsigned char *tabptr = name_table;
318 printf("Named substrings\n");
319 for (i = 0; i < namecount; i++)
320 {
321 int n = (tabptr[0] << 8) | tabptr[1];
322 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
323 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
324 tabptr += name_entry_size;
325 }
326 }
327 } /* End of loop to find second and subsequent matches */
328
329 printf("\n");
330 pcre_free(re); /* Release memory used for the compiled pattern */
331 return 0;
332 }
333
334 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12