/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 487 - (show annotations) (download)
Wed Jan 6 10:26:55 2010 UTC (4 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 12655 byte(s)
Tidying updates for 8.01-RC1 release.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion ("man pcresample" if you have
8 the PCRE man pages installed).
9
10 In Unix-like environments, if PCRE is installed in your standard system
11 libraries, you should be able to compile this program using this command:
12
13 gcc -Wall pcredemo.c -lpcre -o pcredemo
14
15 If PCRE is not installed in a standard place, it is likely to be installed with
16 support for the pkg-config mechanism. If you have pkg-config, you can compile
17 this program using this command:
18
19 gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
20
21 If you do not have pkg-config, you may have to use this:
22
23 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
24 -R/usr/local/lib -lpcre -o pcredemo
25
26 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
27 library files for PCRE are installed on your system. Only some operating
28 systems (e.g. Solaris) use the -R option.
29
30 Building under Windows:
31
32 If you want to statically link this program against a non-dll .a file, you must
33 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
34 pcre_free() exported functions will be declared __declspec(dllimport), with
35 unwanted results. So in this environment, uncomment the following line. */
36
37 /* #define PCRE_STATIC */
38
39 #include <stdio.h>
40 #include <string.h>
41 #include <pcre.h>
42
43 #define OVECCOUNT 30 /* should be a multiple of 3 */
44
45
46 int main(int argc, char **argv)
47 {
48 pcre *re;
49 const char *error;
50 char *pattern;
51 char *subject;
52 unsigned char *name_table;
53 int erroffset;
54 int find_all;
55 int namecount;
56 int name_entry_size;
57 int ovector[OVECCOUNT];
58 int subject_length;
59 int rc, i;
60
61
62 /**************************************************************************
63 * First, sort out the command line. There is only one possible option at *
64 * the moment, "-g" to request repeated matching to find all occurrences, *
65 * like Perl's /g option. We set the variable find_all to a non-zero value *
66 * if the -g option is present. Apart from that, there must be exactly two *
67 * arguments. *
68 **************************************************************************/
69
70 find_all = 0;
71 for (i = 1; i < argc; i++)
72 {
73 if (strcmp(argv[i], "-g") == 0) find_all = 1;
74 else break;
75 }
76
77 /* After the options, we require exactly two arguments, which are the pattern,
78 and the subject string. */
79
80 if (argc - i != 2)
81 {
82 printf("Two arguments required: a regex and a subject string\n");
83 return 1;
84 }
85
86 pattern = argv[i];
87 subject = argv[i+1];
88 subject_length = (int)strlen(subject);
89
90
91 /*************************************************************************
92 * Now we are going to compile the regular expression pattern, and handle *
93 * and errors that are detected. *
94 *************************************************************************/
95
96 re = pcre_compile(
97 pattern, /* the pattern */
98 0, /* default options */
99 &error, /* for error message */
100 &erroffset, /* for error offset */
101 NULL); /* use default character tables */
102
103 /* Compilation failed: print the error message and exit */
104
105 if (re == NULL)
106 {
107 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
108 return 1;
109 }
110
111
112 /*************************************************************************
113 * If the compilation succeeded, we call PCRE again, in order to do a *
114 * pattern match against the subject string. This does just ONE match. If *
115 * further matching is needed, it will be done below. *
116 *************************************************************************/
117
118 rc = pcre_exec(
119 re, /* the compiled pattern */
120 NULL, /* no extra data - we didn't study the pattern */
121 subject, /* the subject string */
122 subject_length, /* the length of the subject */
123 0, /* start at offset 0 in the subject */
124 0, /* default options */
125 ovector, /* output vector for substring information */
126 OVECCOUNT); /* number of elements in the output vector */
127
128 /* Matching failed: handle error cases */
129
130 if (rc < 0)
131 {
132 switch(rc)
133 {
134 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
135 /*
136 Handle other special cases if you like
137 */
138 default: printf("Matching error %d\n", rc); break;
139 }
140 pcre_free(re); /* Release memory used for the compiled pattern */
141 return 1;
142 }
143
144 /* Match succeded */
145
146 printf("\nMatch succeeded at offset %d\n", ovector[0]);
147
148
149 /*************************************************************************
150 * We have found the first match within the subject string. If the output *
151 * vector wasn't big enough, say so. Then output any substrings that were *
152 * captured. *
153 *************************************************************************/
154
155 /* The output vector wasn't big enough */
156
157 if (rc == 0)
158 {
159 rc = OVECCOUNT/3;
160 printf("ovector only has room for %d captured substrings\n", rc - 1);
161 }
162
163 /* Show substrings stored in the output vector by number. Obviously, in a real
164 application you might want to do things other than print them. */
165
166 for (i = 0; i < rc; i++)
167 {
168 char *substring_start = subject + ovector[2*i];
169 int substring_length = ovector[2*i+1] - ovector[2*i];
170 printf("%2d: %.*s\n", i, substring_length, substring_start);
171 }
172
173
174 /**************************************************************************
175 * That concludes the basic part of this demonstration program. We have *
176 * compiled a pattern, and performed a single match. The code that follows *
177 * shows first how to access named substrings, and then how to code for *
178 * repeated matches on the same subject. *
179 **************************************************************************/
180
181 /* See if there are any named substrings, and if so, show them by name. First
182 we have to extract the count of named parentheses from the pattern. */
183
184 (void)pcre_fullinfo(
185 re, /* the compiled pattern */
186 NULL, /* no extra data - we didn't study the pattern */
187 PCRE_INFO_NAMECOUNT, /* number of named substrings */
188 &namecount); /* where to put the answer */
189
190 if (namecount <= 0) printf("No named substrings\n"); else
191 {
192 unsigned char *tabptr;
193 printf("Named substrings\n");
194
195 /* Before we can access the substrings, we must extract the table for
196 translating names to numbers, and the size of each entry in the table. */
197
198 (void)pcre_fullinfo(
199 re, /* the compiled pattern */
200 NULL, /* no extra data - we didn't study the pattern */
201 PCRE_INFO_NAMETABLE, /* address of the table */
202 &name_table); /* where to put the answer */
203
204 (void)pcre_fullinfo(
205 re, /* the compiled pattern */
206 NULL, /* no extra data - we didn't study the pattern */
207 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
208 &name_entry_size); /* where to put the answer */
209
210 /* Now we can scan the table and, for each entry, print the number, the name,
211 and the substring itself. */
212
213 tabptr = name_table;
214 for (i = 0; i < namecount; i++)
215 {
216 int n = (tabptr[0] << 8) | tabptr[1];
217 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
218 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
219 tabptr += name_entry_size;
220 }
221 }
222
223
224 /*************************************************************************
225 * If the "-g" option was given on the command line, we want to continue *
226 * to search for additional matches in the subject string, in a similar *
227 * way to the /g option in Perl. This turns out to be trickier than you *
228 * might think because of the possibility of matching an empty string. *
229 * What happens is as follows: *
230 * *
231 * If the previous match was NOT for an empty string, we can just start *
232 * the next match at the end of the previous one. *
233 * *
234 * If the previous match WAS for an empty string, we can't do that, as it *
235 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
236 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
237 * The first of these tells PCRE that an empty string at the start of the *
238 * subject is not a valid match; other possibilities must be tried. The *
239 * second flag restricts PCRE to one match attempt at the initial string *
240 * position. If this match succeeds, an alternative to the empty string *
241 * match has been found, and we can proceed round the loop. *
242 *************************************************************************/
243
244 if (!find_all)
245 {
246 pcre_free(re); /* Release the memory used for the compiled pattern */
247 return 0; /* Finish unless -g was given */
248 }
249
250 /* Loop for second and subsequent matches */
251
252 for (;;)
253 {
254 int options = 0; /* Normally no options */
255 int start_offset = ovector[1]; /* Start at end of previous match */
256
257 /* If the previous match was for an empty string, we are finished if we are
258 at the end of the subject. Otherwise, arrange to run another match at the
259 same point to see if a non-empty match can be found. */
260
261 if (ovector[0] == ovector[1])
262 {
263 if (ovector[0] == subject_length) break;
264 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
265 }
266
267 /* Run the next matching operation */
268
269 rc = pcre_exec(
270 re, /* the compiled pattern */
271 NULL, /* no extra data - we didn't study the pattern */
272 subject, /* the subject string */
273 subject_length, /* the length of the subject */
274 start_offset, /* starting offset in the subject */
275 options, /* options */
276 ovector, /* output vector for substring information */
277 OVECCOUNT); /* number of elements in the output vector */
278
279 /* This time, a result of NOMATCH isn't an error. If the value in "options"
280 is zero, it just means we have found all possible matches, so the loop ends.
281 Otherwise, it means we have failed to find a non-empty-string match at a
282 point where there was a previous empty-string match. In this case, we do what
283 Perl does: advance the matching position by one, and continue. We do this by
284 setting the "end of previous match" offset, because that is picked up at the
285 top of the loop as the point at which to start again. */
286
287 if (rc == PCRE_ERROR_NOMATCH)
288 {
289 if (options == 0) break;
290 ovector[1] = start_offset + 1;
291 continue; /* Go round the loop again */
292 }
293
294 /* Other matching errors are not recoverable. */
295
296 if (rc < 0)
297 {
298 printf("Matching error %d\n", rc);
299 pcre_free(re); /* Release memory used for the compiled pattern */
300 return 1;
301 }
302
303 /* Match succeded */
304
305 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
306
307 /* The match succeeded, but the output vector wasn't big enough. */
308
309 if (rc == 0)
310 {
311 rc = OVECCOUNT/3;
312 printf("ovector only has room for %d captured substrings\n", rc - 1);
313 }
314
315 /* As before, show substrings stored in the output vector by number, and then
316 also any named substrings. */
317
318 for (i = 0; i < rc; i++)
319 {
320 char *substring_start = subject + ovector[2*i];
321 int substring_length = ovector[2*i+1] - ovector[2*i];
322 printf("%2d: %.*s\n", i, substring_length, substring_start);
323 }
324
325 if (namecount <= 0) printf("No named substrings\n"); else
326 {
327 unsigned char *tabptr = name_table;
328 printf("Named substrings\n");
329 for (i = 0; i < namecount; i++)
330 {
331 int n = (tabptr[0] << 8) | tabptr[1];
332 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
333 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
334 tabptr += name_entry_size;
335 }
336 }
337 } /* End of loop to find second and subsequent matches */
338
339 printf("\n");
340 pcre_free(re); /* Release memory used for the compiled pattern */
341 return 0;
342 }
343
344 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12