/[pcre]/code/trunk/doc/pcredemo.3
ViewVC logotype

Contents of /code/trunk/doc/pcredemo.3

Parent Directory Parent Directory | Revision Log Revision Log


Revision 453 - (show annotations) (download)
Fri Sep 18 19:12:35 2009 UTC (5 years ago) by ph10
File size: 12437 byte(s)
Add more explanation about recursive subpatterns, and make it possible to 
process the documenation without building a whole release.

1 .\" Start example.
2 .de EX
3 . nr mE \\n(.f
4 . nf
5 . nh
6 . ft CW
7 ..
8 .
9 .
10 .\" End example.
11 .de EE
12 . ft \\n(mE
13 . fi
14 . hy \\n(HY
15 ..
16 .
17 .EX
18 /*************************************************
19 * PCRE DEMONSTRATION PROGRAM *
20 *************************************************/
21
22 /* This is a demonstration program to illustrate the most straightforward ways
23 of calling the PCRE regular expression library from a C program. See the
24 pcresample documentation for a short discussion ("man pcresample" if you have
25 the PCRE man pages installed).
26
27 In Unix-like environments, compile this program thuswise:
28
29 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e
30 -R/usr/local/lib -lpcre
31
32 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
33 library files for PCRE are installed on your system. You don't need -I and -L
34 if PCRE is installed in the standard system libraries. Only some operating
35 systems (e.g. Solaris) use the -R option.
36
37 Building under Windows:
38
39 If you want to statically link this program against a non-dll .a file, you must
40 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
41 pcre_free() exported functions will be declared __declspec(dllimport), with
42 unwanted results. So in this environment, uncomment the following line. */
43
44 /* #define PCRE_STATIC */
45
46 #include <stdio.h>
47 #include <string.h>
48 #include <pcre.h>
49
50 #define OVECCOUNT 30 /* should be a multiple of 3 */
51
52
53 int main(int argc, char **argv)
54 {
55 pcre *re;
56 const char *error;
57 char *pattern;
58 char *subject;
59 unsigned char *name_table;
60 int erroffset;
61 int find_all;
62 int namecount;
63 int name_entry_size;
64 int ovector[OVECCOUNT];
65 int subject_length;
66 int rc, i;
67
68
69 /**************************************************************************
70 * First, sort out the command line. There is only one possible option at *
71 * the moment, "-g" to request repeated matching to find all occurrences, *
72 * like Perl's /g option. We set the variable find_all to a non-zero value *
73 * if the -g option is present. Apart from that, there must be exactly two *
74 * arguments. *
75 **************************************************************************/
76
77 find_all = 0;
78 for (i = 1; i < argc; i++)
79 {
80 if (strcmp(argv[i], "-g") == 0) find_all = 1;
81 else break;
82 }
83
84 /* After the options, we require exactly two arguments, which are the pattern,
85 and the subject string. */
86
87 if (argc - i != 2)
88 {
89 printf("Two arguments required: a regex and a subject string\en");
90 return 1;
91 }
92
93 pattern = argv[i];
94 subject = argv[i+1];
95 subject_length = (int)strlen(subject);
96
97
98 /*************************************************************************
99 * Now we are going to compile the regular expression pattern, and handle *
100 * and errors that are detected. *
101 *************************************************************************/
102
103 re = pcre_compile(
104 pattern, /* the pattern */
105 0, /* default options */
106 &error, /* for error message */
107 &erroffset, /* for error offset */
108 NULL); /* use default character tables */
109
110 /* Compilation failed: print the error message and exit */
111
112 if (re == NULL)
113 {
114 printf("PCRE compilation failed at offset %d: %s\en", erroffset, error);
115 return 1;
116 }
117
118
119 /*************************************************************************
120 * If the compilation succeeded, we call PCRE again, in order to do a *
121 * pattern match against the subject string. This does just ONE match. If *
122 * further matching is needed, it will be done below. *
123 *************************************************************************/
124
125 rc = pcre_exec(
126 re, /* the compiled pattern */
127 NULL, /* no extra data - we didn't study the pattern */
128 subject, /* the subject string */
129 subject_length, /* the length of the subject */
130 0, /* start at offset 0 in the subject */
131 0, /* default options */
132 ovector, /* output vector for substring information */
133 OVECCOUNT); /* number of elements in the output vector */
134
135 /* Matching failed: handle error cases */
136
137 if (rc < 0)
138 {
139 switch(rc)
140 {
141 case PCRE_ERROR_NOMATCH: printf("No match\en"); break;
142 /*
143 Handle other special cases if you like
144 */
145 default: printf("Matching error %d\en", rc); break;
146 }
147 pcre_free(re); /* Release memory used for the compiled pattern */
148 return 1;
149 }
150
151 /* Match succeded */
152
153 printf("\enMatch succeeded at offset %d\en", ovector[0]);
154
155
156 /*************************************************************************
157 * We have found the first match within the subject string. If the output *
158 * vector wasn't big enough, say so. Then output any substrings that were *
159 * captured. *
160 *************************************************************************/
161
162 /* The output vector wasn't big enough */
163
164 if (rc == 0)
165 {
166 rc = OVECCOUNT/3;
167 printf("ovector only has room for %d captured substrings\en", rc - 1);
168 }
169
170 /* Show substrings stored in the output vector by number. Obviously, in a real
171 application you might want to do things other than print them. */
172
173 for (i = 0; i < rc; i++)
174 {
175 char *substring_start = subject + ovector[2*i];
176 int substring_length = ovector[2*i+1] - ovector[2*i];
177 printf("%2d: %.*s\en", i, substring_length, substring_start);
178 }
179
180
181 /**************************************************************************
182 * That concludes the basic part of this demonstration program. We have *
183 * compiled a pattern, and performed a single match. The code that follows *
184 * shows first how to access named substrings, and then how to code for *
185 * repeated matches on the same subject. *
186 **************************************************************************/
187
188 /* See if there are any named substrings, and if so, show them by name. First
189 we have to extract the count of named parentheses from the pattern. */
190
191 (void)pcre_fullinfo(
192 re, /* the compiled pattern */
193 NULL, /* no extra data - we didn't study the pattern */
194 PCRE_INFO_NAMECOUNT, /* number of named substrings */
195 &namecount); /* where to put the answer */
196
197 if (namecount <= 0) printf("No named substrings\en"); else
198 {
199 unsigned char *tabptr;
200 printf("Named substrings\en");
201
202 /* Before we can access the substrings, we must extract the table for
203 translating names to numbers, and the size of each entry in the table. */
204
205 (void)pcre_fullinfo(
206 re, /* the compiled pattern */
207 NULL, /* no extra data - we didn't study the pattern */
208 PCRE_INFO_NAMETABLE, /* address of the table */
209 &name_table); /* where to put the answer */
210
211 (void)pcre_fullinfo(
212 re, /* the compiled pattern */
213 NULL, /* no extra data - we didn't study the pattern */
214 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
215 &name_entry_size); /* where to put the answer */
216
217 /* Now we can scan the table and, for each entry, print the number, the name,
218 and the substring itself. */
219
220 tabptr = name_table;
221 for (i = 0; i < namecount; i++)
222 {
223 int n = (tabptr[0] << 8) | tabptr[1];
224 printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
225 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
226 tabptr += name_entry_size;
227 }
228 }
229
230
231 /*************************************************************************
232 * If the "-g" option was given on the command line, we want to continue *
233 * to search for additional matches in the subject string, in a similar *
234 * way to the /g option in Perl. This turns out to be trickier than you *
235 * might think because of the possibility of matching an empty string. *
236 * What happens is as follows: *
237 * *
238 * If the previous match was NOT for an empty string, we can just start *
239 * the next match at the end of the previous one. *
240 * *
241 * If the previous match WAS for an empty string, we can't do that, as it *
242 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
243 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
244 * The first of these tells PCRE that an empty string at the start of the *
245 * subject is not a valid match; other possibilities must be tried. The *
246 * second flag restricts PCRE to one match attempt at the initial string *
247 * position. If this match succeeds, an alternative to the empty string *
248 * match has been found, and we can proceed round the loop. *
249 *************************************************************************/
250
251 if (!find_all)
252 {
253 pcre_free(re); /* Release the memory used for the compiled pattern */
254 return 0; /* Finish unless -g was given */
255 }
256
257 /* Loop for second and subsequent matches */
258
259 for (;;)
260 {
261 int options = 0; /* Normally no options */
262 int start_offset = ovector[1]; /* Start at end of previous match */
263
264 /* If the previous match was for an empty string, we are finished if we are
265 at the end of the subject. Otherwise, arrange to run another match at the
266 same point to see if a non-empty match can be found. */
267
268 if (ovector[0] == ovector[1])
269 {
270 if (ovector[0] == subject_length) break;
271 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
272 }
273
274 /* Run the next matching operation */
275
276 rc = pcre_exec(
277 re, /* the compiled pattern */
278 NULL, /* no extra data - we didn't study the pattern */
279 subject, /* the subject string */
280 subject_length, /* the length of the subject */
281 start_offset, /* starting offset in the subject */
282 options, /* options */
283 ovector, /* output vector for substring information */
284 OVECCOUNT); /* number of elements in the output vector */
285
286 /* This time, a result of NOMATCH isn't an error. If the value in "options"
287 is zero, it just means we have found all possible matches, so the loop ends.
288 Otherwise, it means we have failed to find a non-empty-string match at a
289 point where there was a previous empty-string match. In this case, we do what
290 Perl does: advance the matching position by one, and continue. We do this by
291 setting the "end of previous match" offset, because that is picked up at the
292 top of the loop as the point at which to start again. */
293
294 if (rc == PCRE_ERROR_NOMATCH)
295 {
296 if (options == 0) break;
297 ovector[1] = start_offset + 1;
298 continue; /* Go round the loop again */
299 }
300
301 /* Other matching errors are not recoverable. */
302
303 if (rc < 0)
304 {
305 printf("Matching error %d\en", rc);
306 pcre_free(re); /* Release memory used for the compiled pattern */
307 return 1;
308 }
309
310 /* Match succeded */
311
312 printf("\enMatch succeeded again at offset %d\en", ovector[0]);
313
314 /* The match succeeded, but the output vector wasn't big enough. */
315
316 if (rc == 0)
317 {
318 rc = OVECCOUNT/3;
319 printf("ovector only has room for %d captured substrings\en", rc - 1);
320 }
321
322 /* As before, show substrings stored in the output vector by number, and then
323 also any named substrings. */
324
325 for (i = 0; i < rc; i++)
326 {
327 char *substring_start = subject + ovector[2*i];
328 int substring_length = ovector[2*i+1] - ovector[2*i];
329 printf("%2d: %.*s\en", i, substring_length, substring_start);
330 }
331
332 if (namecount <= 0) printf("No named substrings\en"); else
333 {
334 unsigned char *tabptr = name_table;
335 printf("Named substrings\en");
336 for (i = 0; i < namecount; i++)
337 {
338 int n = (tabptr[0] << 8) | tabptr[1];
339 printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
340 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
341 tabptr += name_entry_size;
342 }
343 }
344 } /* End of loop to find second and subsequent matches */
345
346 printf("\en");
347 pcre_free(re); /* Release memory used for the compiled pattern */
348 return 0;
349 }
350
351 /* End of pcredemo.c */
352 .EE

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12