/[pcre]/code/tags/pcre-4.5/pcredemo.c
ViewVC logotype

Contents of /code/tags/pcre-4.5/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (show annotations) (download)
Sat Feb 24 21:40:03 2007 UTC (7 years, 9 months ago) by nigel
Original Path: code/trunk/pcredemo.c
File MIME type: text/plain
File size: 11338 byte(s)
Load pcre-4.0 into code/trunk.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion.
8
9 Compile thuswise:
10 gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \
11 -R/opt/local/lib -lpcre
12
13 Replace "/opt/local/include" and "/opt/local/lib" with wherever the include and
14 library files for PCRE are installed on your system. Only some operating
15 systems (e.g. Solaris) use the -R option.
16 */
17
18
19 #include <stdio.h>
20 #include <string.h>
21 #include <pcre.h>
22
23 #define OVECCOUNT 30 /* should be a multiple of 3 */
24
25
26 int main(int argc, char **argv)
27 {
28 pcre *re;
29 const char *error;
30 char *pattern;
31 char *subject;
32 unsigned char *name_table;
33 int erroffset;
34 int find_all;
35 int namecount;
36 int name_entry_size;
37 int ovector[OVECCOUNT];
38 int subject_length;
39 int rc, i;
40
41
42 /*************************************************************************
43 * First, sort out the command line. There is only one possible option at *
44 * the moment, "-g" to request repeated matching to find all occurrences, *
45 * like Perl's /g option. We set the variable find_all non-zero if it is *
46 * present. Apart from that, there must be exactly two arguments. *
47 *************************************************************************/
48
49 find_all = 0;
50 for (i = 1; i < argc; i++)
51 {
52 if (strcmp(argv[i], "-g") == 0) find_all = 1;
53 else break;
54 }
55
56 /* After the options, we require exactly two arguments, which are the pattern,
57 and the subject string. */
58
59 if (argc - i != 2)
60 {
61 printf("Two arguments required: a regex and a subject string\n");
62 return 1;
63 }
64
65 pattern = argv[i];
66 subject = argv[i+1];
67 subject_length = (int)strlen(subject);
68
69
70 /*************************************************************************
71 * Now we are going to compile the regular expression pattern, and handle *
72 * and errors that are detected. *
73 *************************************************************************/
74
75 re = pcre_compile(
76 pattern, /* the pattern */
77 0, /* default options */
78 &error, /* for error message */
79 &erroffset, /* for error offset */
80 NULL); /* use default character tables */
81
82 /* Compilation failed: print the error message and exit */
83
84 if (re == NULL)
85 {
86 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
87 return 1;
88 }
89
90
91 /*************************************************************************
92 * If the compilation succeeded, we call PCRE again, in order to do a *
93 * pattern match against the subject string. This just does ONE match. If *
94 * further matching is needed, it will be done below. *
95 *************************************************************************/
96
97 rc = pcre_exec(
98 re, /* the compiled pattern */
99 NULL, /* no extra data - we didn't study the pattern */
100 subject, /* the subject string */
101 subject_length, /* the length of the subject */
102 0, /* start at offset 0 in the subject */
103 0, /* default options */
104 ovector, /* output vector for substring information */
105 OVECCOUNT); /* number of elements in the output vector */
106
107 /* Matching failed: handle error cases */
108
109 if (rc < 0)
110 {
111 switch(rc)
112 {
113 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
114 /*
115 Handle other special cases if you like
116 */
117 default: printf("Matching error %d\n", rc); break;
118 }
119 return 1;
120 }
121
122 /* Match succeded */
123
124 printf("\nMatch succeeded at offset %d\n", ovector[0]);
125
126
127 /*************************************************************************
128 * We have found the first match within the subject string. If the output *
129 * vector wasn't big enough, set its size to the maximum. Then output any *
130 * substrings that were captured. *
131 *************************************************************************/
132
133 /* The output vector wasn't big enough */
134
135 if (rc == 0)
136 {
137 rc = OVECCOUNT/3;
138 printf("ovector only has room for %d captured substrings\n", rc - 1);
139 }
140
141 /* Show substrings stored in the output vector by number. Obviously, in a real
142 application you might want to do things other than print them. */
143
144 for (i = 0; i < rc; i++)
145 {
146 char *substring_start = subject + ovector[2*i];
147 int substring_length = ovector[2*i+1] - ovector[2*i];
148 printf("%2d: %.*s\n", i, substring_length, substring_start);
149 }
150
151
152 /*************************************************************************
153 * That concludes the basic part of this demonstration program. We have *
154 * compiled a pattern, and performed a single match. The code that follows*
155 * first shows how to access named substrings, and then how to code for *
156 * repeated matches on the same subject. *
157 *************************************************************************/
158
159 /* See if there are any named substrings, and if so, show them by name. First
160 we have to extract the count of named parentheses from the pattern. */
161
162 (void)pcre_fullinfo(
163 re, /* the compiled pattern */
164 NULL, /* no extra data - we didn't study the pattern */
165 PCRE_INFO_NAMECOUNT, /* number of named substrings */
166 &namecount); /* where to put the answer */
167
168 if (namecount <= 0) printf("No named substrings\n"); else
169 {
170 unsigned char *tabptr;
171 printf("Named substrings\n");
172
173 /* Before we can access the substrings, we must extract the table for
174 translating names to numbers, and the size of each entry in the table. */
175
176 (void)pcre_fullinfo(
177 re, /* the compiled pattern */
178 NULL, /* no extra data - we didn't study the pattern */
179 PCRE_INFO_NAMETABLE, /* address of the table */
180 &name_table); /* where to put the answer */
181
182 (void)pcre_fullinfo(
183 re, /* the compiled pattern */
184 NULL, /* no extra data - we didn't study the pattern */
185 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
186 &name_entry_size); /* where to put the answer */
187
188 /* Now we can scan the table and, for each entry, print the number, the name,
189 and the substring itself. */
190
191 tabptr = name_table;
192 for (i = 0; i < namecount; i++)
193 {
194 int n = (tabptr[0] << 8) | tabptr[1];
195 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
196 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
197 tabptr += name_entry_size;
198 }
199 }
200
201
202 /*************************************************************************
203 * If the "-g" option was given on the command line, we want to continue *
204 * to search for additional matches in the subject string, in a similar *
205 * way to the /g option in Perl. This turns out to be trickier than you *
206 * might think because of the possibility of matching an empty string. *
207 * What happens is as follows: *
208 * *
209 * If the previous match was NOT for an empty string, we can just start *
210 * the next match at the end of the previous one. *
211 * *
212 * If the previous match WAS for an empty string, we can't do that, as it *
213 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
214 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
215 * of these tells PCRE that an empty string is not a valid match; other *
216 * possibilities must be tried. The second flag restricts PCRE to one *
217 * match attempt at the initial string position. If this match succeeds, *
218 * an alternative to the empty string match has been found, and we can *
219 * proceed round the loop. *
220 *************************************************************************/
221
222 if (!find_all) return 0; /* Finish unless -g was given */
223
224 /* Loop for second and subsequent matches */
225
226 for (;;)
227 {
228 int options = 0; /* Normally no options */
229 int start_offset = ovector[1]; /* Start at end of previous match */
230
231 /* If the previous match was for an empty string, we are finished if we are
232 at the end of the subject. Otherwise, arrange to run another match at the
233 same point to see if a non-empty match can be found. */
234
235 if (ovector[0] == ovector[1])
236 {
237 if (ovector[0] == subject_length) break;
238 options = PCRE_NOTEMPTY | PCRE_ANCHORED;
239 }
240
241 /* Run the next matching operation */
242
243 rc = pcre_exec(
244 re, /* the compiled pattern */
245 NULL, /* no extra data - we didn't study the pattern */
246 subject, /* the subject string */
247 subject_length, /* the length of the subject */
248 start_offset, /* starting offset in the subject */
249 options, /* options */
250 ovector, /* output vector for substring information */
251 OVECCOUNT); /* number of elements in the output vector */
252
253 /* This time, a result of NOMATCH isn't an error. If the value in "options"
254 is zero, it just means we have found all possible matches, so the loop ends.
255 Otherwise, it means we have failed to find a non-empty-string match at a
256 point where there was a previous empty-string match. In this case, we do what
257 Perl does: advance the matching position by one, and continue. We do this by
258 setting the "end of previous match" offset, because that is picked up at the
259 top of the loop as the point at which to start again. */
260
261 if (rc == PCRE_ERROR_NOMATCH)
262 {
263 if (options == 0) break;
264 ovector[1] = start_offset + 1;
265 continue; /* Go round the loop again */
266 }
267
268 /* Other matching errors are not recoverable. */
269
270 if (rc < 0)
271 {
272 printf("Matching error %d\n", rc);
273 return 1;
274 }
275
276 /* Match succeded */
277
278 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
279
280 /* The match succeeded, but the output vector wasn't big enough. */
281
282 if (rc == 0)
283 {
284 rc = OVECCOUNT/3;
285 printf("ovector only has room for %d captured substrings\n", rc - 1);
286 }
287
288 /* As before, show substrings stored in the output vector by number, and then
289 also any named substrings. */
290
291 for (i = 0; i < rc; i++)
292 {
293 char *substring_start = subject + ovector[2*i];
294 int substring_length = ovector[2*i+1] - ovector[2*i];
295 printf("%2d: %.*s\n", i, substring_length, substring_start);
296 }
297
298 if (namecount <= 0) printf("No named substrings\n"); else
299 {
300 unsigned char *tabptr = name_table;
301 printf("Named substrings\n");
302 for (i = 0; i < namecount; i++)
303 {
304 int n = (tabptr[0] << 8) | tabptr[1];
305 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
306 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
307 tabptr += name_entry_size;
308 }
309 }
310 } /* End of loop to find second and subsequent matches */
311
312 printf("\n");
313 return 0;
314 }
315
316 /* End of pcredemo.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12