/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 11783 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion.
8
9 Compile thuswise:
10 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11 -R/usr/local/lib -lpcre
12
13 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 library files for PCRE are installed on your system. Only some operating
15 systems (e.g. Solaris) use the -R option.
16 */
17
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include <stdio.h>
24 #include <string.h>
25 #include <pcre.h>
26
27 #define OVECCOUNT 30 /* should be a multiple of 3 */
28
29
30 int main(int argc, char **argv)
31 {
32 pcre *re;
33 const char *error;
34 char *pattern;
35 char *subject;
36 unsigned char *name_table;
37 int erroffset;
38 int find_all;
39 int namecount;
40 int name_entry_size;
41 int ovector[OVECCOUNT];
42 int subject_length;
43 int rc, i;
44
45
46 /**************************************************************************
47 * First, sort out the command line. There is only one possible option at *
48 * the moment, "-g" to request repeated matching to find all occurrences, *
49 * like Perl's /g option. We set the variable find_all to a non-zero value *
50 * if the -g option is present. Apart from that, there must be exactly two *
51 * arguments. *
52 **************************************************************************/
53
54 find_all = 0;
55 for (i = 1; i < argc; i++)
56 {
57 if (strcmp(argv[i], "-g") == 0) find_all = 1;
58 else break;
59 }
60
61 /* After the options, we require exactly two arguments, which are the pattern,
62 and the subject string. */
63
64 if (argc - i != 2)
65 {
66 printf("Two arguments required: a regex and a subject string\n");
67 return 1;
68 }
69
70 pattern = argv[i];
71 subject = argv[i+1];
72 subject_length = (int)strlen(subject);
73
74
75 /*************************************************************************
76 * Now we are going to compile the regular expression pattern, and handle *
77 * and errors that are detected. *
78 *************************************************************************/
79
80 re = pcre_compile(
81 pattern, /* the pattern */
82 0, /* default options */
83 &error, /* for error message */
84 &erroffset, /* for error offset */
85 NULL); /* use default character tables */
86
87 /* Compilation failed: print the error message and exit */
88
89 if (re == NULL)
90 {
91 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
92 return 1;
93 }
94
95
96 /*************************************************************************
97 * If the compilation succeeded, we call PCRE again, in order to do a *
98 * pattern match against the subject string. This does just ONE match. If *
99 * further matching is needed, it will be done below. *
100 *************************************************************************/
101
102 rc = pcre_exec(
103 re, /* the compiled pattern */
104 NULL, /* no extra data - we didn't study the pattern */
105 subject, /* the subject string */
106 subject_length, /* the length of the subject */
107 0, /* start at offset 0 in the subject */
108 0, /* default options */
109 ovector, /* output vector for substring information */
110 OVECCOUNT); /* number of elements in the output vector */
111
112 /* Matching failed: handle error cases */
113
114 if (rc < 0)
115 {
116 switch(rc)
117 {
118 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
119 /*
120 Handle other special cases if you like
121 */
122 default: printf("Matching error %d\n", rc); break;
123 }
124 pcre_free(re); /* Release memory used for the compiled pattern */
125 return 1;
126 }
127
128 /* Match succeded */
129
130 printf("\nMatch succeeded at offset %d\n", ovector[0]);
131
132
133 /*************************************************************************
134 * We have found the first match within the subject string. If the output *
135 * vector wasn't big enough, set its size to the maximum. Then output any *
136 * substrings that were captured. *
137 *************************************************************************/
138
139 /* The output vector wasn't big enough */
140
141 if (rc == 0)
142 {
143 rc = OVECCOUNT/3;
144 printf("ovector only has room for %d captured substrings\n", rc - 1);
145 }
146
147 /* Show substrings stored in the output vector by number. Obviously, in a real
148 application you might want to do things other than print them. */
149
150 for (i = 0; i < rc; i++)
151 {
152 char *substring_start = subject + ovector[2*i];
153 int substring_length = ovector[2*i+1] - ovector[2*i];
154 printf("%2d: %.*s\n", i, substring_length, substring_start);
155 }
156
157
158 /**************************************************************************
159 * That concludes the basic part of this demonstration program. We have *
160 * compiled a pattern, and performed a single match. The code that follows *
161 * first shows how to access named substrings, and then how to code for *
162 * repeated matches on the same subject. *
163 **************************************************************************/
164
165 /* See if there are any named substrings, and if so, show them by name. First
166 we have to extract the count of named parentheses from the pattern. */
167
168 (void)pcre_fullinfo(
169 re, /* the compiled pattern */
170 NULL, /* no extra data - we didn't study the pattern */
171 PCRE_INFO_NAMECOUNT, /* number of named substrings */
172 &namecount); /* where to put the answer */
173
174 if (namecount <= 0) printf("No named substrings\n"); else
175 {
176 unsigned char *tabptr;
177 printf("Named substrings\n");
178
179 /* Before we can access the substrings, we must extract the table for
180 translating names to numbers, and the size of each entry in the table. */
181
182 (void)pcre_fullinfo(
183 re, /* the compiled pattern */
184 NULL, /* no extra data - we didn't study the pattern */
185 PCRE_INFO_NAMETABLE, /* address of the table */
186 &name_table); /* where to put the answer */
187
188 (void)pcre_fullinfo(
189 re, /* the compiled pattern */
190 NULL, /* no extra data - we didn't study the pattern */
191 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
192 &name_entry_size); /* where to put the answer */
193
194 /* Now we can scan the table and, for each entry, print the number, the name,
195 and the substring itself. */
196
197 tabptr = name_table;
198 for (i = 0; i < namecount; i++)
199 {
200 int n = (tabptr[0] << 8) | tabptr[1];
201 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
202 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
203 tabptr += name_entry_size;
204 }
205 }
206
207
208 /*************************************************************************
209 * If the "-g" option was given on the command line, we want to continue *
210 * to search for additional matches in the subject string, in a similar *
211 * way to the /g option in Perl. This turns out to be trickier than you *
212 * might think because of the possibility of matching an empty string. *
213 * What happens is as follows: *
214 * *
215 * If the previous match was NOT for an empty string, we can just start *
216 * the next match at the end of the previous one. *
217 * *
218 * If the previous match WAS for an empty string, we can't do that, as it *
219 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
220 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
221 * of these tells PCRE that an empty string is not a valid match; other *
222 * possibilities must be tried. The second flag restricts PCRE to one *
223 * match attempt at the initial string position. If this match succeeds, *
224 * an alternative to the empty string match has been found, and we can *
225 * proceed round the loop. *
226 *************************************************************************/
227
228 if (!find_all)
229 {
230 pcre_free(re); /* Release the memory used for the compiled pattern */
231 return 0; /* Finish unless -g was given */
232 }
233
234 /* Loop for second and subsequent matches */
235
236 for (;;)
237 {
238 int options = 0; /* Normally no options */
239 int start_offset = ovector[1]; /* Start at end of previous match */
240
241 /* If the previous match was for an empty string, we are finished if we are
242 at the end of the subject. Otherwise, arrange to run another match at the
243 same point to see if a non-empty match can be found. */
244
245 if (ovector[0] == ovector[1])
246 {
247 if (ovector[0] == subject_length) break;
248 options = PCRE_NOTEMPTY | PCRE_ANCHORED;
249 }
250
251 /* Run the next matching operation */
252
253 rc = pcre_exec(
254 re, /* the compiled pattern */
255 NULL, /* no extra data - we didn't study the pattern */
256 subject, /* the subject string */
257 subject_length, /* the length of the subject */
258 start_offset, /* starting offset in the subject */
259 options, /* options */
260 ovector, /* output vector for substring information */
261 OVECCOUNT); /* number of elements in the output vector */
262
263 /* This time, a result of NOMATCH isn't an error. If the value in "options"
264 is zero, it just means we have found all possible matches, so the loop ends.
265 Otherwise, it means we have failed to find a non-empty-string match at a
266 point where there was a previous empty-string match. In this case, we do what
267 Perl does: advance the matching position by one, and continue. We do this by
268 setting the "end of previous match" offset, because that is picked up at the
269 top of the loop as the point at which to start again. */
270
271 if (rc == PCRE_ERROR_NOMATCH)
272 {
273 if (options == 0) break;
274 ovector[1] = start_offset + 1;
275 continue; /* Go round the loop again */
276 }
277
278 /* Other matching errors are not recoverable. */
279
280 if (rc < 0)
281 {
282 printf("Matching error %d\n", rc);
283 pcre_free(re); /* Release memory used for the compiled pattern */
284 return 1;
285 }
286
287 /* Match succeded */
288
289 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
290
291 /* The match succeeded, but the output vector wasn't big enough. */
292
293 if (rc == 0)
294 {
295 rc = OVECCOUNT/3;
296 printf("ovector only has room for %d captured substrings\n", rc - 1);
297 }
298
299 /* As before, show substrings stored in the output vector by number, and then
300 also any named substrings. */
301
302 for (i = 0; i < rc; i++)
303 {
304 char *substring_start = subject + ovector[2*i];
305 int substring_length = ovector[2*i+1] - ovector[2*i];
306 printf("%2d: %.*s\n", i, substring_length, substring_start);
307 }
308
309 if (namecount <= 0) printf("No named substrings\n"); else
310 {
311 unsigned char *tabptr = name_table;
312 printf("Named substrings\n");
313 for (i = 0; i < namecount; i++)
314 {
315 int n = (tabptr[0] << 8) | tabptr[1];
316 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
317 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
318 tabptr += name_entry_size;
319 }
320 }
321 } /* End of loop to find second and subsequent matches */
322
323 printf("\n");
324 pcre_free(re); /* Release memory used for the compiled pattern */
325 return 0;
326 }
327
328 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12