/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (hide annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 11732 byte(s)
Load pcre-6.7 into code/trunk.

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7     pcresample documentation for a short discussion.
8    
9     Compile thuswise:
10 nigel 75 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11     -R/usr/local/lib -lpcre
12 nigel 63
13 nigel 75 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 nigel 63 library files for PCRE are installed on your system. Only some operating
15     systems (e.g. Solaris) use the -R option.
16 nigel 53 */
17    
18 nigel 63
19     #include <stdio.h>
20     #include <string.h>
21     #include <pcre.h>
22    
23 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
24    
25 nigel 63
26 nigel 53 int main(int argc, char **argv)
27     {
28     pcre *re;
29     const char *error;
30 nigel 63 char *pattern;
31     char *subject;
32     unsigned char *name_table;
33 nigel 53 int erroffset;
34 nigel 63 int find_all;
35     int namecount;
36     int name_entry_size;
37 nigel 53 int ovector[OVECCOUNT];
38 nigel 63 int subject_length;
39 nigel 53 int rc, i;
40    
41 nigel 63
42 nigel 75 /**************************************************************************
43     * First, sort out the command line. There is only one possible option at *
44     * the moment, "-g" to request repeated matching to find all occurrences, *
45     * like Perl's /g option. We set the variable find_all to a non-zero value *
46     * if the -g option is present. Apart from that, there must be exactly two *
47     * arguments. *
48     **************************************************************************/
49 nigel 63
50     find_all = 0;
51     for (i = 1; i < argc; i++)
52 nigel 53 {
53 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
54     else break;
55     }
56    
57     /* After the options, we require exactly two arguments, which are the pattern,
58     and the subject string. */
59    
60     if (argc - i != 2)
61     {
62 nigel 53 printf("Two arguments required: a regex and a subject string\n");
63     return 1;
64     }
65    
66 nigel 63 pattern = argv[i];
67     subject = argv[i+1];
68     subject_length = (int)strlen(subject);
69 nigel 53
70 nigel 63
71     /*************************************************************************
72     * Now we are going to compile the regular expression pattern, and handle *
73     * and errors that are detected. *
74     *************************************************************************/
75    
76 nigel 53 re = pcre_compile(
77 nigel 63 pattern, /* the pattern */
78 nigel 53 0, /* default options */
79     &error, /* for error message */
80     &erroffset, /* for error offset */
81     NULL); /* use default character tables */
82    
83     /* Compilation failed: print the error message and exit */
84    
85     if (re == NULL)
86     {
87     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
88     return 1;
89     }
90    
91    
92 nigel 63 /*************************************************************************
93     * If the compilation succeeded, we call PCRE again, in order to do a *
94 nigel 75 * pattern match against the subject string. This does just ONE match. If *
95 nigel 63 * further matching is needed, it will be done below. *
96     *************************************************************************/
97    
98 nigel 53 rc = pcre_exec(
99     re, /* the compiled pattern */
100     NULL, /* no extra data - we didn't study the pattern */
101 nigel 63 subject, /* the subject string */
102     subject_length, /* the length of the subject */
103 nigel 53 0, /* start at offset 0 in the subject */
104     0, /* default options */
105     ovector, /* output vector for substring information */
106     OVECCOUNT); /* number of elements in the output vector */
107    
108     /* Matching failed: handle error cases */
109    
110     if (rc < 0)
111     {
112     switch(rc)
113     {
114     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
115     /*
116     Handle other special cases if you like
117     */
118     default: printf("Matching error %d\n", rc); break;
119     }
120 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
121 nigel 53 return 1;
122     }
123    
124     /* Match succeded */
125    
126 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
127 nigel 53
128 nigel 63
129     /*************************************************************************
130     * We have found the first match within the subject string. If the output *
131     * vector wasn't big enough, set its size to the maximum. Then output any *
132     * substrings that were captured. *
133     *************************************************************************/
134    
135 nigel 53 /* The output vector wasn't big enough */
136    
137     if (rc == 0)
138     {
139     rc = OVECCOUNT/3;
140     printf("ovector only has room for %d captured substrings\n", rc - 1);
141     }
142    
143 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
144     application you might want to do things other than print them. */
145 nigel 53
146     for (i = 0; i < rc; i++)
147     {
148 nigel 63 char *substring_start = subject + ovector[2*i];
149 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
150     printf("%2d: %.*s\n", i, substring_length, substring_start);
151     }
152    
153 nigel 63
154 nigel 75 /**************************************************************************
155     * That concludes the basic part of this demonstration program. We have *
156     * compiled a pattern, and performed a single match. The code that follows *
157     * first shows how to access named substrings, and then how to code for *
158     * repeated matches on the same subject. *
159     **************************************************************************/
160 nigel 63
161     /* See if there are any named substrings, and if so, show them by name. First
162     we have to extract the count of named parentheses from the pattern. */
163    
164     (void)pcre_fullinfo(
165     re, /* the compiled pattern */
166     NULL, /* no extra data - we didn't study the pattern */
167     PCRE_INFO_NAMECOUNT, /* number of named substrings */
168     &namecount); /* where to put the answer */
169    
170     if (namecount <= 0) printf("No named substrings\n"); else
171     {
172     unsigned char *tabptr;
173     printf("Named substrings\n");
174    
175     /* Before we can access the substrings, we must extract the table for
176     translating names to numbers, and the size of each entry in the table. */
177    
178     (void)pcre_fullinfo(
179     re, /* the compiled pattern */
180     NULL, /* no extra data - we didn't study the pattern */
181     PCRE_INFO_NAMETABLE, /* address of the table */
182     &name_table); /* where to put the answer */
183    
184     (void)pcre_fullinfo(
185     re, /* the compiled pattern */
186     NULL, /* no extra data - we didn't study the pattern */
187     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
188     &name_entry_size); /* where to put the answer */
189    
190     /* Now we can scan the table and, for each entry, print the number, the name,
191     and the substring itself. */
192    
193     tabptr = name_table;
194     for (i = 0; i < namecount; i++)
195     {
196     int n = (tabptr[0] << 8) | tabptr[1];
197     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
198     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
199     tabptr += name_entry_size;
200     }
201     }
202    
203    
204     /*************************************************************************
205     * If the "-g" option was given on the command line, we want to continue *
206     * to search for additional matches in the subject string, in a similar *
207     * way to the /g option in Perl. This turns out to be trickier than you *
208     * might think because of the possibility of matching an empty string. *
209     * What happens is as follows: *
210     * *
211     * If the previous match was NOT for an empty string, we can just start *
212     * the next match at the end of the previous one. *
213     * *
214     * If the previous match WAS for an empty string, we can't do that, as it *
215     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
216     * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
217     * of these tells PCRE that an empty string is not a valid match; other *
218     * possibilities must be tried. The second flag restricts PCRE to one *
219     * match attempt at the initial string position. If this match succeeds, *
220     * an alternative to the empty string match has been found, and we can *
221     * proceed round the loop. *
222     *************************************************************************/
223    
224 nigel 75 if (!find_all)
225     {
226 nigel 91 pcre_free(re); /* Release the memory used for the compiled pattern */
227     return 0; /* Finish unless -g was given */
228 nigel 75 }
229 nigel 63
230     /* Loop for second and subsequent matches */
231    
232     for (;;)
233     {
234     int options = 0; /* Normally no options */
235     int start_offset = ovector[1]; /* Start at end of previous match */
236    
237     /* If the previous match was for an empty string, we are finished if we are
238     at the end of the subject. Otherwise, arrange to run another match at the
239     same point to see if a non-empty match can be found. */
240    
241     if (ovector[0] == ovector[1])
242     {
243     if (ovector[0] == subject_length) break;
244     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
245     }
246    
247     /* Run the next matching operation */
248    
249     rc = pcre_exec(
250     re, /* the compiled pattern */
251     NULL, /* no extra data - we didn't study the pattern */
252     subject, /* the subject string */
253     subject_length, /* the length of the subject */
254     start_offset, /* starting offset in the subject */
255     options, /* options */
256     ovector, /* output vector for substring information */
257     OVECCOUNT); /* number of elements in the output vector */
258    
259     /* This time, a result of NOMATCH isn't an error. If the value in "options"
260     is zero, it just means we have found all possible matches, so the loop ends.
261     Otherwise, it means we have failed to find a non-empty-string match at a
262     point where there was a previous empty-string match. In this case, we do what
263     Perl does: advance the matching position by one, and continue. We do this by
264     setting the "end of previous match" offset, because that is picked up at the
265     top of the loop as the point at which to start again. */
266    
267     if (rc == PCRE_ERROR_NOMATCH)
268     {
269     if (options == 0) break;
270     ovector[1] = start_offset + 1;
271     continue; /* Go round the loop again */
272     }
273    
274     /* Other matching errors are not recoverable. */
275    
276     if (rc < 0)
277     {
278     printf("Matching error %d\n", rc);
279 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
280 nigel 63 return 1;
281     }
282    
283     /* Match succeded */
284    
285     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
286    
287     /* The match succeeded, but the output vector wasn't big enough. */
288    
289     if (rc == 0)
290     {
291     rc = OVECCOUNT/3;
292     printf("ovector only has room for %d captured substrings\n", rc - 1);
293     }
294    
295     /* As before, show substrings stored in the output vector by number, and then
296     also any named substrings. */
297    
298     for (i = 0; i < rc; i++)
299     {
300     char *substring_start = subject + ovector[2*i];
301     int substring_length = ovector[2*i+1] - ovector[2*i];
302     printf("%2d: %.*s\n", i, substring_length, substring_start);
303     }
304    
305     if (namecount <= 0) printf("No named substrings\n"); else
306     {
307     unsigned char *tabptr = name_table;
308     printf("Named substrings\n");
309     for (i = 0; i < namecount; i++)
310     {
311     int n = (tabptr[0] << 8) | tabptr[1];
312     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
313     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
314     tabptr += name_entry_size;
315     }
316     }
317     } /* End of loop to find second and subsequent matches */
318    
319     printf("\n");
320 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
321 nigel 53 return 0;
322     }
323    
324 nigel 63 /* End of pcredemo.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12