/[pcre]/code/tags/pcre-6.0/pcredemo.c
ViewVC logotype

Contents of /code/tags/pcre-6.0/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 78 - (show annotations) (download)
Sat Feb 24 21:40:47 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 11707 byte(s)
Tag code/trunk as code/tags/pcre-6.0.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion.
8
9 Compile thuswise:
10 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11 -R/usr/local/lib -lpcre
12
13 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 library files for PCRE are installed on your system. Only some operating
15 systems (e.g. Solaris) use the -R option.
16 */
17
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <pcre.h>
22
23 #define OVECCOUNT 30 /* should be a multiple of 3 */
24
25
26 int main(int argc, char **argv)
27 {
28 pcre *re;
29 const char *error;
30 char *pattern;
31 char *subject;
32 unsigned char *name_table;
33 int erroffset;
34 int find_all;
35 int namecount;
36 int name_entry_size;
37 int ovector[OVECCOUNT];
38 int subject_length;
39 int rc, i;
40
41
42 /**************************************************************************
43 * First, sort out the command line. There is only one possible option at *
44 * the moment, "-g" to request repeated matching to find all occurrences, *
45 * like Perl's /g option. We set the variable find_all to a non-zero value *
46 * if the -g option is present. Apart from that, there must be exactly two *
47 * arguments. *
48 **************************************************************************/
49
50 find_all = 0;
51 for (i = 1; i < argc; i++)
52 {
53 if (strcmp(argv[i], "-g") == 0) find_all = 1;
54 else break;
55 }
56
57 /* After the options, we require exactly two arguments, which are the pattern,
58 and the subject string. */
59
60 if (argc - i != 2)
61 {
62 printf("Two arguments required: a regex and a subject string\n");
63 return 1;
64 }
65
66 pattern = argv[i];
67 subject = argv[i+1];
68 subject_length = (int)strlen(subject);
69
70
71 /*************************************************************************
72 * Now we are going to compile the regular expression pattern, and handle *
73 * and errors that are detected. *
74 *************************************************************************/
75
76 re = pcre_compile(
77 pattern, /* the pattern */
78 0, /* default options */
79 &error, /* for error message */
80 &erroffset, /* for error offset */
81 NULL); /* use default character tables */
82
83 /* Compilation failed: print the error message and exit */
84
85 if (re == NULL)
86 {
87 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
88 return 1;
89 }
90
91
92 /*************************************************************************
93 * If the compilation succeeded, we call PCRE again, in order to do a *
94 * pattern match against the subject string. This does just ONE match. If *
95 * further matching is needed, it will be done below. *
96 *************************************************************************/
97
98 rc = pcre_exec(
99 re, /* the compiled pattern */
100 NULL, /* no extra data - we didn't study the pattern */
101 subject, /* the subject string */
102 subject_length, /* the length of the subject */
103 0, /* start at offset 0 in the subject */
104 0, /* default options */
105 ovector, /* output vector for substring information */
106 OVECCOUNT); /* number of elements in the output vector */
107
108 /* Matching failed: handle error cases */
109
110 if (rc < 0)
111 {
112 switch(rc)
113 {
114 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
115 /*
116 Handle other special cases if you like
117 */
118 default: printf("Matching error %d\n", rc); break;
119 }
120 free(re); /* Release memory used for the compiled pattern */
121 return 1;
122 }
123
124 /* Match succeded */
125
126 printf("\nMatch succeeded at offset %d\n", ovector[0]);
127
128
129 /*************************************************************************
130 * We have found the first match within the subject string. If the output *
131 * vector wasn't big enough, set its size to the maximum. Then output any *
132 * substrings that were captured. *
133 *************************************************************************/
134
135 /* The output vector wasn't big enough */
136
137 if (rc == 0)
138 {
139 rc = OVECCOUNT/3;
140 printf("ovector only has room for %d captured substrings\n", rc - 1);
141 }
142
143 /* Show substrings stored in the output vector by number. Obviously, in a real
144 application you might want to do things other than print them. */
145
146 for (i = 0; i < rc; i++)
147 {
148 char *substring_start = subject + ovector[2*i];
149 int substring_length = ovector[2*i+1] - ovector[2*i];
150 printf("%2d: %.*s\n", i, substring_length, substring_start);
151 }
152
153
154 /**************************************************************************
155 * That concludes the basic part of this demonstration program. We have *
156 * compiled a pattern, and performed a single match. The code that follows *
157 * first shows how to access named substrings, and then how to code for *
158 * repeated matches on the same subject. *
159 **************************************************************************/
160
161 /* See if there are any named substrings, and if so, show them by name. First
162 we have to extract the count of named parentheses from the pattern. */
163
164 (void)pcre_fullinfo(
165 re, /* the compiled pattern */
166 NULL, /* no extra data - we didn't study the pattern */
167 PCRE_INFO_NAMECOUNT, /* number of named substrings */
168 &namecount); /* where to put the answer */
169
170 if (namecount <= 0) printf("No named substrings\n"); else
171 {
172 unsigned char *tabptr;
173 printf("Named substrings\n");
174
175 /* Before we can access the substrings, we must extract the table for
176 translating names to numbers, and the size of each entry in the table. */
177
178 (void)pcre_fullinfo(
179 re, /* the compiled pattern */
180 NULL, /* no extra data - we didn't study the pattern */
181 PCRE_INFO_NAMETABLE, /* address of the table */
182 &name_table); /* where to put the answer */
183
184 (void)pcre_fullinfo(
185 re, /* the compiled pattern */
186 NULL, /* no extra data - we didn't study the pattern */
187 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
188 &name_entry_size); /* where to put the answer */
189
190 /* Now we can scan the table and, for each entry, print the number, the name,
191 and the substring itself. */
192
193 tabptr = name_table;
194 for (i = 0; i < namecount; i++)
195 {
196 int n = (tabptr[0] << 8) | tabptr[1];
197 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
198 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
199 tabptr += name_entry_size;
200 }
201 }
202
203
204 /*************************************************************************
205 * If the "-g" option was given on the command line, we want to continue *
206 * to search for additional matches in the subject string, in a similar *
207 * way to the /g option in Perl. This turns out to be trickier than you *
208 * might think because of the possibility of matching an empty string. *
209 * What happens is as follows: *
210 * *
211 * If the previous match was NOT for an empty string, we can just start *
212 * the next match at the end of the previous one. *
213 * *
214 * If the previous match WAS for an empty string, we can't do that, as it *
215 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
216 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
217 * of these tells PCRE that an empty string is not a valid match; other *
218 * possibilities must be tried. The second flag restricts PCRE to one *
219 * match attempt at the initial string position. If this match succeeds, *
220 * an alternative to the empty string match has been found, and we can *
221 * proceed round the loop. *
222 *************************************************************************/
223
224 if (!find_all)
225 {
226 free(re); /* Release the memory used for the compiled pattern */
227 return 0; /* Finish unless -g was given */
228 }
229
230 /* Loop for second and subsequent matches */
231
232 for (;;)
233 {
234 int options = 0; /* Normally no options */
235 int start_offset = ovector[1]; /* Start at end of previous match */
236
237 /* If the previous match was for an empty string, we are finished if we are
238 at the end of the subject. Otherwise, arrange to run another match at the
239 same point to see if a non-empty match can be found. */
240
241 if (ovector[0] == ovector[1])
242 {
243 if (ovector[0] == subject_length) break;
244 options = PCRE_NOTEMPTY | PCRE_ANCHORED;
245 }
246
247 /* Run the next matching operation */
248
249 rc = pcre_exec(
250 re, /* the compiled pattern */
251 NULL, /* no extra data - we didn't study the pattern */
252 subject, /* the subject string */
253 subject_length, /* the length of the subject */
254 start_offset, /* starting offset in the subject */
255 options, /* options */
256 ovector, /* output vector for substring information */
257 OVECCOUNT); /* number of elements in the output vector */
258
259 /* This time, a result of NOMATCH isn't an error. If the value in "options"
260 is zero, it just means we have found all possible matches, so the loop ends.
261 Otherwise, it means we have failed to find a non-empty-string match at a
262 point where there was a previous empty-string match. In this case, we do what
263 Perl does: advance the matching position by one, and continue. We do this by
264 setting the "end of previous match" offset, because that is picked up at the
265 top of the loop as the point at which to start again. */
266
267 if (rc == PCRE_ERROR_NOMATCH)
268 {
269 if (options == 0) break;
270 ovector[1] = start_offset + 1;
271 continue; /* Go round the loop again */
272 }
273
274 /* Other matching errors are not recoverable. */
275
276 if (rc < 0)
277 {
278 printf("Matching error %d\n", rc);
279 free(re); /* Release memory used for the compiled pattern */
280 return 1;
281 }
282
283 /* Match succeded */
284
285 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
286
287 /* The match succeeded, but the output vector wasn't big enough. */
288
289 if (rc == 0)
290 {
291 rc = OVECCOUNT/3;
292 printf("ovector only has room for %d captured substrings\n", rc - 1);
293 }
294
295 /* As before, show substrings stored in the output vector by number, and then
296 also any named substrings. */
297
298 for (i = 0; i < rc; i++)
299 {
300 char *substring_start = subject + ovector[2*i];
301 int substring_length = ovector[2*i+1] - ovector[2*i];
302 printf("%2d: %.*s\n", i, substring_length, substring_start);
303 }
304
305 if (namecount <= 0) printf("No named substrings\n"); else
306 {
307 unsigned char *tabptr = name_table;
308 printf("Named substrings\n");
309 for (i = 0; i < namecount; i++)
310 {
311 int n = (tabptr[0] << 8) | tabptr[1];
312 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
313 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
314 tabptr += name_entry_size;
315 }
316 }
317 } /* End of loop to find second and subsequent matches */
318
319 printf("\n");
320 free(re); /* Release memory used for the compiled pattern */
321 return 0;
322 }
323
324 /* End of pcredemo.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12