/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 200 - (show annotations) (download)
Wed Aug 1 09:10:40 2007 UTC (7 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 11812 byte(s)
Correct errors in previous patch; tidy for test release.

1 /*************************************************
2 * PCRE DEMONSTRATION PROGRAM *
3 *************************************************/
4
5 /* This is a demonstration program to illustrate the most straightforward ways
6 of calling the PCRE regular expression library from a C program. See the
7 pcresample documentation for a short discussion.
8
9 Compile thuswise:
10 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11 -R/usr/local/lib -lpcre
12
13 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 library files for PCRE are installed on your system. You don't need -I and -L
15 if PCRE is installed in the standard system libraries. Only some operating
16 systems (e.g. Solaris) use the -R option.
17 */
18
19
20 #include <stdio.h>
21 #include <string.h>
22 #include <pcre.h>
23
24 #define OVECCOUNT 30 /* should be a multiple of 3 */
25
26
27 int main(int argc, char **argv)
28 {
29 pcre *re;
30 const char *error;
31 char *pattern;
32 char *subject;
33 unsigned char *name_table;
34 int erroffset;
35 int find_all;
36 int namecount;
37 int name_entry_size;
38 int ovector[OVECCOUNT];
39 int subject_length;
40 int rc, i;
41
42
43 /**************************************************************************
44 * First, sort out the command line. There is only one possible option at *
45 * the moment, "-g" to request repeated matching to find all occurrences, *
46 * like Perl's /g option. We set the variable find_all to a non-zero value *
47 * if the -g option is present. Apart from that, there must be exactly two *
48 * arguments. *
49 **************************************************************************/
50
51 find_all = 0;
52 for (i = 1; i < argc; i++)
53 {
54 if (strcmp(argv[i], "-g") == 0) find_all = 1;
55 else break;
56 }
57
58 /* After the options, we require exactly two arguments, which are the pattern,
59 and the subject string. */
60
61 if (argc - i != 2)
62 {
63 printf("Two arguments required: a regex and a subject string\n");
64 return 1;
65 }
66
67 pattern = argv[i];
68 subject = argv[i+1];
69 subject_length = (int)strlen(subject);
70
71
72 /*************************************************************************
73 * Now we are going to compile the regular expression pattern, and handle *
74 * and errors that are detected. *
75 *************************************************************************/
76
77 re = pcre_compile(
78 pattern, /* the pattern */
79 0, /* default options */
80 &error, /* for error message */
81 &erroffset, /* for error offset */
82 NULL); /* use default character tables */
83
84 /* Compilation failed: print the error message and exit */
85
86 if (re == NULL)
87 {
88 printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
89 return 1;
90 }
91
92
93 /*************************************************************************
94 * If the compilation succeeded, we call PCRE again, in order to do a *
95 * pattern match against the subject string. This does just ONE match. If *
96 * further matching is needed, it will be done below. *
97 *************************************************************************/
98
99 rc = pcre_exec(
100 re, /* the compiled pattern */
101 NULL, /* no extra data - we didn't study the pattern */
102 subject, /* the subject string */
103 subject_length, /* the length of the subject */
104 0, /* start at offset 0 in the subject */
105 0, /* default options */
106 ovector, /* output vector for substring information */
107 OVECCOUNT); /* number of elements in the output vector */
108
109 /* Matching failed: handle error cases */
110
111 if (rc < 0)
112 {
113 switch(rc)
114 {
115 case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
116 /*
117 Handle other special cases if you like
118 */
119 default: printf("Matching error %d\n", rc); break;
120 }
121 pcre_free(re); /* Release memory used for the compiled pattern */
122 return 1;
123 }
124
125 /* Match succeded */
126
127 printf("\nMatch succeeded at offset %d\n", ovector[0]);
128
129
130 /*************************************************************************
131 * We have found the first match within the subject string. If the output *
132 * vector wasn't big enough, set its size to the maximum. Then output any *
133 * substrings that were captured. *
134 *************************************************************************/
135
136 /* The output vector wasn't big enough */
137
138 if (rc == 0)
139 {
140 rc = OVECCOUNT/3;
141 printf("ovector only has room for %d captured substrings\n", rc - 1);
142 }
143
144 /* Show substrings stored in the output vector by number. Obviously, in a real
145 application you might want to do things other than print them. */
146
147 for (i = 0; i < rc; i++)
148 {
149 char *substring_start = subject + ovector[2*i];
150 int substring_length = ovector[2*i+1] - ovector[2*i];
151 printf("%2d: %.*s\n", i, substring_length, substring_start);
152 }
153
154
155 /**************************************************************************
156 * That concludes the basic part of this demonstration program. We have *
157 * compiled a pattern, and performed a single match. The code that follows *
158 * first shows how to access named substrings, and then how to code for *
159 * repeated matches on the same subject. *
160 **************************************************************************/
161
162 /* See if there are any named substrings, and if so, show them by name. First
163 we have to extract the count of named parentheses from the pattern. */
164
165 (void)pcre_fullinfo(
166 re, /* the compiled pattern */
167 NULL, /* no extra data - we didn't study the pattern */
168 PCRE_INFO_NAMECOUNT, /* number of named substrings */
169 &namecount); /* where to put the answer */
170
171 if (namecount <= 0) printf("No named substrings\n"); else
172 {
173 unsigned char *tabptr;
174 printf("Named substrings\n");
175
176 /* Before we can access the substrings, we must extract the table for
177 translating names to numbers, and the size of each entry in the table. */
178
179 (void)pcre_fullinfo(
180 re, /* the compiled pattern */
181 NULL, /* no extra data - we didn't study the pattern */
182 PCRE_INFO_NAMETABLE, /* address of the table */
183 &name_table); /* where to put the answer */
184
185 (void)pcre_fullinfo(
186 re, /* the compiled pattern */
187 NULL, /* no extra data - we didn't study the pattern */
188 PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
189 &name_entry_size); /* where to put the answer */
190
191 /* Now we can scan the table and, for each entry, print the number, the name,
192 and the substring itself. */
193
194 tabptr = name_table;
195 for (i = 0; i < namecount; i++)
196 {
197 int n = (tabptr[0] << 8) | tabptr[1];
198 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
199 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
200 tabptr += name_entry_size;
201 }
202 }
203
204
205 /*************************************************************************
206 * If the "-g" option was given on the command line, we want to continue *
207 * to search for additional matches in the subject string, in a similar *
208 * way to the /g option in Perl. This turns out to be trickier than you *
209 * might think because of the possibility of matching an empty string. *
210 * What happens is as follows: *
211 * *
212 * If the previous match was NOT for an empty string, we can just start *
213 * the next match at the end of the previous one. *
214 * *
215 * If the previous match WAS for an empty string, we can't do that, as it *
216 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
217 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
218 * of these tells PCRE that an empty string is not a valid match; other *
219 * possibilities must be tried. The second flag restricts PCRE to one *
220 * match attempt at the initial string position. If this match succeeds, *
221 * an alternative to the empty string match has been found, and we can *
222 * proceed round the loop. *
223 *************************************************************************/
224
225 if (!find_all)
226 {
227 pcre_free(re); /* Release the memory used for the compiled pattern */
228 return 0; /* Finish unless -g was given */
229 }
230
231 /* Loop for second and subsequent matches */
232
233 for (;;)
234 {
235 int options = 0; /* Normally no options */
236 int start_offset = ovector[1]; /* Start at end of previous match */
237
238 /* If the previous match was for an empty string, we are finished if we are
239 at the end of the subject. Otherwise, arrange to run another match at the
240 same point to see if a non-empty match can be found. */
241
242 if (ovector[0] == ovector[1])
243 {
244 if (ovector[0] == subject_length) break;
245 options = PCRE_NOTEMPTY | PCRE_ANCHORED;
246 }
247
248 /* Run the next matching operation */
249
250 rc = pcre_exec(
251 re, /* the compiled pattern */
252 NULL, /* no extra data - we didn't study the pattern */
253 subject, /* the subject string */
254 subject_length, /* the length of the subject */
255 start_offset, /* starting offset in the subject */
256 options, /* options */
257 ovector, /* output vector for substring information */
258 OVECCOUNT); /* number of elements in the output vector */
259
260 /* This time, a result of NOMATCH isn't an error. If the value in "options"
261 is zero, it just means we have found all possible matches, so the loop ends.
262 Otherwise, it means we have failed to find a non-empty-string match at a
263 point where there was a previous empty-string match. In this case, we do what
264 Perl does: advance the matching position by one, and continue. We do this by
265 setting the "end of previous match" offset, because that is picked up at the
266 top of the loop as the point at which to start again. */
267
268 if (rc == PCRE_ERROR_NOMATCH)
269 {
270 if (options == 0) break;
271 ovector[1] = start_offset + 1;
272 continue; /* Go round the loop again */
273 }
274
275 /* Other matching errors are not recoverable. */
276
277 if (rc < 0)
278 {
279 printf("Matching error %d\n", rc);
280 pcre_free(re); /* Release memory used for the compiled pattern */
281 return 1;
282 }
283
284 /* Match succeded */
285
286 printf("\nMatch succeeded again at offset %d\n", ovector[0]);
287
288 /* The match succeeded, but the output vector wasn't big enough. */
289
290 if (rc == 0)
291 {
292 rc = OVECCOUNT/3;
293 printf("ovector only has room for %d captured substrings\n", rc - 1);
294 }
295
296 /* As before, show substrings stored in the output vector by number, and then
297 also any named substrings. */
298
299 for (i = 0; i < rc; i++)
300 {
301 char *substring_start = subject + ovector[2*i];
302 int substring_length = ovector[2*i+1] - ovector[2*i];
303 printf("%2d: %.*s\n", i, substring_length, substring_start);
304 }
305
306 if (namecount <= 0) printf("No named substrings\n"); else
307 {
308 unsigned char *tabptr = name_table;
309 printf("Named substrings\n");
310 for (i = 0; i < namecount; i++)
311 {
312 int n = (tabptr[0] << 8) | tabptr[1];
313 printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
314 ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
315 tabptr += name_entry_size;
316 }
317 }
318 } /* End of loop to find second and subsequent matches */
319
320 printf("\n");
321 pcre_free(re); /* Release memory used for the compiled pattern */
322 return 0;
323 }
324
325 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12