/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 319 - (hide annotations) (download)
Mon Jan 28 15:27:45 2008 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 12269 byte(s)
File tidy-up for 7.6 release.

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7 ph10 319 pcresample documentation for a short discussion ("man pcresample" if you have
8 ph10 315 the PCRE man pages installed).
9 nigel 63
10 ph10 315 In Unix-like environments, compile this program thuswise:
11    
12 nigel 75 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
13     -R/usr/local/lib -lpcre
14 nigel 63
15 nigel 75 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
16 ph10 200 library files for PCRE are installed on your system. You don't need -I and -L
17 ph10 199 if PCRE is installed in the standard system libraries. Only some operating
18 nigel 63 systems (e.g. Solaris) use the -R option.
19 nigel 53
20 ph10 315 Building under Windows:
21 nigel 63
22 ph10 315 If you want to statically link this program against a non-dll .a file, you must
23     define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
24     pcre_free() exported functions will be declared __declspec(dllimport), with
25     unwanted results. So in this environment, uncomment the following line. */
26    
27     /* #define PCRE_STATIC */
28    
29 nigel 63 #include <stdio.h>
30     #include <string.h>
31     #include <pcre.h>
32    
33 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
34    
35 nigel 63
36 nigel 53 int main(int argc, char **argv)
37     {
38     pcre *re;
39     const char *error;
40 nigel 63 char *pattern;
41     char *subject;
42     unsigned char *name_table;
43 nigel 53 int erroffset;
44 nigel 63 int find_all;
45     int namecount;
46     int name_entry_size;
47 nigel 53 int ovector[OVECCOUNT];
48 nigel 63 int subject_length;
49 nigel 53 int rc, i;
50    
51 nigel 63
52 nigel 75 /**************************************************************************
53     * First, sort out the command line. There is only one possible option at *
54     * the moment, "-g" to request repeated matching to find all occurrences, *
55     * like Perl's /g option. We set the variable find_all to a non-zero value *
56     * if the -g option is present. Apart from that, there must be exactly two *
57     * arguments. *
58     **************************************************************************/
59 nigel 63
60     find_all = 0;
61     for (i = 1; i < argc; i++)
62 nigel 53 {
63 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
64     else break;
65     }
66    
67     /* After the options, we require exactly two arguments, which are the pattern,
68     and the subject string. */
69    
70     if (argc - i != 2)
71     {
72 nigel 53 printf("Two arguments required: a regex and a subject string\n");
73     return 1;
74     }
75    
76 nigel 63 pattern = argv[i];
77     subject = argv[i+1];
78     subject_length = (int)strlen(subject);
79 nigel 53
80 nigel 63
81     /*************************************************************************
82     * Now we are going to compile the regular expression pattern, and handle *
83     * and errors that are detected. *
84     *************************************************************************/
85    
86 nigel 53 re = pcre_compile(
87 nigel 63 pattern, /* the pattern */
88 nigel 53 0, /* default options */
89     &error, /* for error message */
90     &erroffset, /* for error offset */
91     NULL); /* use default character tables */
92    
93     /* Compilation failed: print the error message and exit */
94    
95     if (re == NULL)
96     {
97     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
98     return 1;
99     }
100    
101    
102 nigel 63 /*************************************************************************
103     * If the compilation succeeded, we call PCRE again, in order to do a *
104 nigel 75 * pattern match against the subject string. This does just ONE match. If *
105 nigel 63 * further matching is needed, it will be done below. *
106     *************************************************************************/
107    
108 nigel 53 rc = pcre_exec(
109     re, /* the compiled pattern */
110     NULL, /* no extra data - we didn't study the pattern */
111 nigel 63 subject, /* the subject string */
112     subject_length, /* the length of the subject */
113 nigel 53 0, /* start at offset 0 in the subject */
114     0, /* default options */
115     ovector, /* output vector for substring information */
116     OVECCOUNT); /* number of elements in the output vector */
117    
118     /* Matching failed: handle error cases */
119    
120     if (rc < 0)
121     {
122     switch(rc)
123     {
124     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
125     /*
126     Handle other special cases if you like
127     */
128     default: printf("Matching error %d\n", rc); break;
129     }
130 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
131 nigel 53 return 1;
132     }
133    
134     /* Match succeded */
135    
136 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
137 nigel 53
138 nigel 63
139     /*************************************************************************
140     * We have found the first match within the subject string. If the output *
141 ph10 315 * vector wasn't big enough, say so. Then output any substrings that were *
142     * captured. *
143 nigel 63 *************************************************************************/
144    
145 nigel 53 /* The output vector wasn't big enough */
146    
147     if (rc == 0)
148     {
149     rc = OVECCOUNT/3;
150     printf("ovector only has room for %d captured substrings\n", rc - 1);
151     }
152    
153 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
154     application you might want to do things other than print them. */
155 nigel 53
156     for (i = 0; i < rc; i++)
157     {
158 nigel 63 char *substring_start = subject + ovector[2*i];
159 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
160     printf("%2d: %.*s\n", i, substring_length, substring_start);
161     }
162    
163 nigel 63
164 nigel 75 /**************************************************************************
165     * That concludes the basic part of this demonstration program. We have *
166     * compiled a pattern, and performed a single match. The code that follows *
167 ph10 315 * shows first how to access named substrings, and then how to code for *
168 nigel 75 * repeated matches on the same subject. *
169     **************************************************************************/
170 nigel 63
171     /* See if there are any named substrings, and if so, show them by name. First
172     we have to extract the count of named parentheses from the pattern. */
173    
174     (void)pcre_fullinfo(
175     re, /* the compiled pattern */
176     NULL, /* no extra data - we didn't study the pattern */
177     PCRE_INFO_NAMECOUNT, /* number of named substrings */
178     &namecount); /* where to put the answer */
179    
180     if (namecount <= 0) printf("No named substrings\n"); else
181     {
182     unsigned char *tabptr;
183     printf("Named substrings\n");
184    
185     /* Before we can access the substrings, we must extract the table for
186     translating names to numbers, and the size of each entry in the table. */
187    
188     (void)pcre_fullinfo(
189     re, /* the compiled pattern */
190     NULL, /* no extra data - we didn't study the pattern */
191     PCRE_INFO_NAMETABLE, /* address of the table */
192     &name_table); /* where to put the answer */
193    
194     (void)pcre_fullinfo(
195     re, /* the compiled pattern */
196     NULL, /* no extra data - we didn't study the pattern */
197     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
198     &name_entry_size); /* where to put the answer */
199    
200     /* Now we can scan the table and, for each entry, print the number, the name,
201     and the substring itself. */
202    
203     tabptr = name_table;
204     for (i = 0; i < namecount; i++)
205     {
206     int n = (tabptr[0] << 8) | tabptr[1];
207     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
208     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
209     tabptr += name_entry_size;
210     }
211     }
212    
213    
214     /*************************************************************************
215     * If the "-g" option was given on the command line, we want to continue *
216     * to search for additional matches in the subject string, in a similar *
217     * way to the /g option in Perl. This turns out to be trickier than you *
218     * might think because of the possibility of matching an empty string. *
219     * What happens is as follows: *
220     * *
221     * If the previous match was NOT for an empty string, we can just start *
222     * the next match at the end of the previous one. *
223     * *
224     * If the previous match WAS for an empty string, we can't do that, as it *
225     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
226     * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
227     * of these tells PCRE that an empty string is not a valid match; other *
228     * possibilities must be tried. The second flag restricts PCRE to one *
229     * match attempt at the initial string position. If this match succeeds, *
230     * an alternative to the empty string match has been found, and we can *
231     * proceed round the loop. *
232     *************************************************************************/
233    
234 nigel 75 if (!find_all)
235     {
236 nigel 91 pcre_free(re); /* Release the memory used for the compiled pattern */
237     return 0; /* Finish unless -g was given */
238 nigel 75 }
239 nigel 63
240     /* Loop for second and subsequent matches */
241    
242     for (;;)
243     {
244     int options = 0; /* Normally no options */
245     int start_offset = ovector[1]; /* Start at end of previous match */
246    
247     /* If the previous match was for an empty string, we are finished if we are
248     at the end of the subject. Otherwise, arrange to run another match at the
249     same point to see if a non-empty match can be found. */
250    
251     if (ovector[0] == ovector[1])
252     {
253     if (ovector[0] == subject_length) break;
254     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
255     }
256    
257     /* Run the next matching operation */
258    
259     rc = pcre_exec(
260     re, /* the compiled pattern */
261     NULL, /* no extra data - we didn't study the pattern */
262     subject, /* the subject string */
263     subject_length, /* the length of the subject */
264     start_offset, /* starting offset in the subject */
265     options, /* options */
266     ovector, /* output vector for substring information */
267     OVECCOUNT); /* number of elements in the output vector */
268    
269     /* This time, a result of NOMATCH isn't an error. If the value in "options"
270     is zero, it just means we have found all possible matches, so the loop ends.
271     Otherwise, it means we have failed to find a non-empty-string match at a
272     point where there was a previous empty-string match. In this case, we do what
273     Perl does: advance the matching position by one, and continue. We do this by
274     setting the "end of previous match" offset, because that is picked up at the
275     top of the loop as the point at which to start again. */
276    
277     if (rc == PCRE_ERROR_NOMATCH)
278     {
279     if (options == 0) break;
280     ovector[1] = start_offset + 1;
281     continue; /* Go round the loop again */
282     }
283    
284     /* Other matching errors are not recoverable. */
285    
286     if (rc < 0)
287     {
288     printf("Matching error %d\n", rc);
289 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
290 nigel 63 return 1;
291     }
292    
293     /* Match succeded */
294    
295     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
296    
297     /* The match succeeded, but the output vector wasn't big enough. */
298    
299     if (rc == 0)
300     {
301     rc = OVECCOUNT/3;
302     printf("ovector only has room for %d captured substrings\n", rc - 1);
303     }
304    
305     /* As before, show substrings stored in the output vector by number, and then
306     also any named substrings. */
307    
308     for (i = 0; i < rc; i++)
309     {
310     char *substring_start = subject + ovector[2*i];
311     int substring_length = ovector[2*i+1] - ovector[2*i];
312     printf("%2d: %.*s\n", i, substring_length, substring_start);
313     }
314    
315     if (namecount <= 0) printf("No named substrings\n"); else
316     {
317     unsigned char *tabptr = name_table;
318     printf("Named substrings\n");
319     for (i = 0; i < namecount; i++)
320     {
321     int n = (tabptr[0] << 8) | tabptr[1];
322     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
323     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
324     tabptr += name_entry_size;
325     }
326     }
327     } /* End of loop to find second and subsequent matches */
328    
329     printf("\n");
330 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
331 nigel 53 return 0;
332     }
333    
334 nigel 63 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12