/[pcre]/code/tags/pcre-7.7/pcredemo.c
ViewVC logotype

Contents of /code/tags/pcre-7.7/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (hide annotations) (download)
Sat Feb 24 21:40:03 2007 UTC (7 years, 2 months ago) by nigel
Original Path: code/trunk/pcredemo.c
File MIME type: text/plain
File size: 11338 byte(s)
Load pcre-4.0 into code/trunk.

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7     pcresample documentation for a short discussion.
8    
9     Compile thuswise:
10 nigel 53 gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \
11     -R/opt/local/lib -lpcre
12 nigel 63
13     Replace "/opt/local/include" and "/opt/local/lib" with wherever the include and
14     library files for PCRE are installed on your system. Only some operating
15     systems (e.g. Solaris) use the -R option.
16 nigel 53 */
17    
18 nigel 63
19     #include <stdio.h>
20     #include <string.h>
21     #include <pcre.h>
22    
23 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
24    
25 nigel 63
26 nigel 53 int main(int argc, char **argv)
27     {
28     pcre *re;
29     const char *error;
30 nigel 63 char *pattern;
31     char *subject;
32     unsigned char *name_table;
33 nigel 53 int erroffset;
34 nigel 63 int find_all;
35     int namecount;
36     int name_entry_size;
37 nigel 53 int ovector[OVECCOUNT];
38 nigel 63 int subject_length;
39 nigel 53 int rc, i;
40    
41 nigel 63
42     /*************************************************************************
43     * First, sort out the command line. There is only one possible option at *
44     * the moment, "-g" to request repeated matching to find all occurrences, *
45     * like Perl's /g option. We set the variable find_all non-zero if it is *
46     * present. Apart from that, there must be exactly two arguments. *
47     *************************************************************************/
48    
49     find_all = 0;
50     for (i = 1; i < argc; i++)
51 nigel 53 {
52 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
53     else break;
54     }
55    
56     /* After the options, we require exactly two arguments, which are the pattern,
57     and the subject string. */
58    
59     if (argc - i != 2)
60     {
61 nigel 53 printf("Two arguments required: a regex and a subject string\n");
62     return 1;
63     }
64    
65 nigel 63 pattern = argv[i];
66     subject = argv[i+1];
67     subject_length = (int)strlen(subject);
68 nigel 53
69 nigel 63
70     /*************************************************************************
71     * Now we are going to compile the regular expression pattern, and handle *
72     * and errors that are detected. *
73     *************************************************************************/
74    
75 nigel 53 re = pcre_compile(
76 nigel 63 pattern, /* the pattern */
77 nigel 53 0, /* default options */
78     &error, /* for error message */
79     &erroffset, /* for error offset */
80     NULL); /* use default character tables */
81    
82     /* Compilation failed: print the error message and exit */
83    
84     if (re == NULL)
85     {
86     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
87     return 1;
88     }
89    
90    
91 nigel 63 /*************************************************************************
92     * If the compilation succeeded, we call PCRE again, in order to do a *
93     * pattern match against the subject string. This just does ONE match. If *
94     * further matching is needed, it will be done below. *
95     *************************************************************************/
96    
97 nigel 53 rc = pcre_exec(
98     re, /* the compiled pattern */
99     NULL, /* no extra data - we didn't study the pattern */
100 nigel 63 subject, /* the subject string */
101     subject_length, /* the length of the subject */
102 nigel 53 0, /* start at offset 0 in the subject */
103     0, /* default options */
104     ovector, /* output vector for substring information */
105     OVECCOUNT); /* number of elements in the output vector */
106    
107     /* Matching failed: handle error cases */
108    
109     if (rc < 0)
110     {
111     switch(rc)
112     {
113     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
114     /*
115     Handle other special cases if you like
116     */
117     default: printf("Matching error %d\n", rc); break;
118     }
119     return 1;
120     }
121    
122     /* Match succeded */
123    
124 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
125 nigel 53
126 nigel 63
127     /*************************************************************************
128     * We have found the first match within the subject string. If the output *
129     * vector wasn't big enough, set its size to the maximum. Then output any *
130     * substrings that were captured. *
131     *************************************************************************/
132    
133 nigel 53 /* The output vector wasn't big enough */
134    
135     if (rc == 0)
136     {
137     rc = OVECCOUNT/3;
138     printf("ovector only has room for %d captured substrings\n", rc - 1);
139     }
140    
141 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
142     application you might want to do things other than print them. */
143 nigel 53
144     for (i = 0; i < rc; i++)
145     {
146 nigel 63 char *substring_start = subject + ovector[2*i];
147 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
148     printf("%2d: %.*s\n", i, substring_length, substring_start);
149     }
150    
151 nigel 63
152     /*************************************************************************
153     * That concludes the basic part of this demonstration program. We have *
154     * compiled a pattern, and performed a single match. The code that follows*
155     * first shows how to access named substrings, and then how to code for *
156     * repeated matches on the same subject. *
157     *************************************************************************/
158    
159     /* See if there are any named substrings, and if so, show them by name. First
160     we have to extract the count of named parentheses from the pattern. */
161    
162     (void)pcre_fullinfo(
163     re, /* the compiled pattern */
164     NULL, /* no extra data - we didn't study the pattern */
165     PCRE_INFO_NAMECOUNT, /* number of named substrings */
166     &namecount); /* where to put the answer */
167    
168     if (namecount <= 0) printf("No named substrings\n"); else
169     {
170     unsigned char *tabptr;
171     printf("Named substrings\n");
172    
173     /* Before we can access the substrings, we must extract the table for
174     translating names to numbers, and the size of each entry in the table. */
175    
176     (void)pcre_fullinfo(
177     re, /* the compiled pattern */
178     NULL, /* no extra data - we didn't study the pattern */
179     PCRE_INFO_NAMETABLE, /* address of the table */
180     &name_table); /* where to put the answer */
181    
182     (void)pcre_fullinfo(
183     re, /* the compiled pattern */
184     NULL, /* no extra data - we didn't study the pattern */
185     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
186     &name_entry_size); /* where to put the answer */
187    
188     /* Now we can scan the table and, for each entry, print the number, the name,
189     and the substring itself. */
190    
191     tabptr = name_table;
192     for (i = 0; i < namecount; i++)
193     {
194     int n = (tabptr[0] << 8) | tabptr[1];
195     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
196     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
197     tabptr += name_entry_size;
198     }
199     }
200    
201    
202     /*************************************************************************
203     * If the "-g" option was given on the command line, we want to continue *
204     * to search for additional matches in the subject string, in a similar *
205     * way to the /g option in Perl. This turns out to be trickier than you *
206     * might think because of the possibility of matching an empty string. *
207     * What happens is as follows: *
208     * *
209     * If the previous match was NOT for an empty string, we can just start *
210     * the next match at the end of the previous one. *
211     * *
212     * If the previous match WAS for an empty string, we can't do that, as it *
213     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
214     * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
215     * of these tells PCRE that an empty string is not a valid match; other *
216     * possibilities must be tried. The second flag restricts PCRE to one *
217     * match attempt at the initial string position. If this match succeeds, *
218     * an alternative to the empty string match has been found, and we can *
219     * proceed round the loop. *
220     *************************************************************************/
221    
222     if (!find_all) return 0; /* Finish unless -g was given */
223    
224     /* Loop for second and subsequent matches */
225    
226     for (;;)
227     {
228     int options = 0; /* Normally no options */
229     int start_offset = ovector[1]; /* Start at end of previous match */
230    
231     /* If the previous match was for an empty string, we are finished if we are
232     at the end of the subject. Otherwise, arrange to run another match at the
233     same point to see if a non-empty match can be found. */
234    
235     if (ovector[0] == ovector[1])
236     {
237     if (ovector[0] == subject_length) break;
238     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
239     }
240    
241     /* Run the next matching operation */
242    
243     rc = pcre_exec(
244     re, /* the compiled pattern */
245     NULL, /* no extra data - we didn't study the pattern */
246     subject, /* the subject string */
247     subject_length, /* the length of the subject */
248     start_offset, /* starting offset in the subject */
249     options, /* options */
250     ovector, /* output vector for substring information */
251     OVECCOUNT); /* number of elements in the output vector */
252    
253     /* This time, a result of NOMATCH isn't an error. If the value in "options"
254     is zero, it just means we have found all possible matches, so the loop ends.
255     Otherwise, it means we have failed to find a non-empty-string match at a
256     point where there was a previous empty-string match. In this case, we do what
257     Perl does: advance the matching position by one, and continue. We do this by
258     setting the "end of previous match" offset, because that is picked up at the
259     top of the loop as the point at which to start again. */
260    
261     if (rc == PCRE_ERROR_NOMATCH)
262     {
263     if (options == 0) break;
264     ovector[1] = start_offset + 1;
265     continue; /* Go round the loop again */
266     }
267    
268     /* Other matching errors are not recoverable. */
269    
270     if (rc < 0)
271     {
272     printf("Matching error %d\n", rc);
273     return 1;
274     }
275    
276     /* Match succeded */
277    
278     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
279    
280     /* The match succeeded, but the output vector wasn't big enough. */
281    
282     if (rc == 0)
283     {
284     rc = OVECCOUNT/3;
285     printf("ovector only has room for %d captured substrings\n", rc - 1);
286     }
287    
288     /* As before, show substrings stored in the output vector by number, and then
289     also any named substrings. */
290    
291     for (i = 0; i < rc; i++)
292     {
293     char *substring_start = subject + ovector[2*i];
294     int substring_length = ovector[2*i+1] - ovector[2*i];
295     printf("%2d: %.*s\n", i, substring_length, substring_start);
296     }
297    
298     if (namecount <= 0) printf("No named substrings\n"); else
299     {
300     unsigned char *tabptr = name_table;
301     printf("Named substrings\n");
302     for (i = 0; i < namecount; i++)
303     {
304     int n = (tabptr[0] << 8) | tabptr[1];
305     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
306     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
307     tabptr += name_entry_size;
308     }
309     }
310     } /* End of loop to find second and subsequent matches */
311    
312     printf("\n");
313 nigel 53 return 0;
314     }
315    
316 nigel 63 /* End of pcredemo.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12