/[pcre]/code/tags/pcre-8.02/pcredemo.c
ViewVC logotype

Contents of /code/tags/pcre-8.02/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (hide annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 4 months ago) by ph10
Original Path: code/trunk/pcredemo.c
File MIME type: text/plain
File size: 11783 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7     pcresample documentation for a short discussion.
8    
9     Compile thuswise:
10 nigel 75 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11     -R/usr/local/lib -lpcre
12 nigel 63
13 nigel 75 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 nigel 63 library files for PCRE are installed on your system. Only some operating
15     systems (e.g. Solaris) use the -R option.
16 nigel 53 */
17    
18 nigel 63
19 ph10 97 #ifdef HAVE_CONFIG_H
20     # include <config.h>
21     #endif
22    
23 nigel 63 #include <stdio.h>
24     #include <string.h>
25     #include <pcre.h>
26    
27 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
28    
29 nigel 63
30 nigel 53 int main(int argc, char **argv)
31     {
32     pcre *re;
33     const char *error;
34 nigel 63 char *pattern;
35     char *subject;
36     unsigned char *name_table;
37 nigel 53 int erroffset;
38 nigel 63 int find_all;
39     int namecount;
40     int name_entry_size;
41 nigel 53 int ovector[OVECCOUNT];
42 nigel 63 int subject_length;
43 nigel 53 int rc, i;
44    
45 nigel 63
46 nigel 75 /**************************************************************************
47     * First, sort out the command line. There is only one possible option at *
48     * the moment, "-g" to request repeated matching to find all occurrences, *
49     * like Perl's /g option. We set the variable find_all to a non-zero value *
50     * if the -g option is present. Apart from that, there must be exactly two *
51     * arguments. *
52     **************************************************************************/
53 nigel 63
54     find_all = 0;
55     for (i = 1; i < argc; i++)
56 nigel 53 {
57 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
58     else break;
59     }
60    
61     /* After the options, we require exactly two arguments, which are the pattern,
62     and the subject string. */
63    
64     if (argc - i != 2)
65     {
66 nigel 53 printf("Two arguments required: a regex and a subject string\n");
67     return 1;
68     }
69    
70 nigel 63 pattern = argv[i];
71     subject = argv[i+1];
72     subject_length = (int)strlen(subject);
73 nigel 53
74 nigel 63
75     /*************************************************************************
76     * Now we are going to compile the regular expression pattern, and handle *
77     * and errors that are detected. *
78     *************************************************************************/
79    
80 nigel 53 re = pcre_compile(
81 nigel 63 pattern, /* the pattern */
82 nigel 53 0, /* default options */
83     &error, /* for error message */
84     &erroffset, /* for error offset */
85     NULL); /* use default character tables */
86    
87     /* Compilation failed: print the error message and exit */
88    
89     if (re == NULL)
90     {
91     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
92     return 1;
93     }
94    
95    
96 nigel 63 /*************************************************************************
97     * If the compilation succeeded, we call PCRE again, in order to do a *
98 nigel 75 * pattern match against the subject string. This does just ONE match. If *
99 nigel 63 * further matching is needed, it will be done below. *
100     *************************************************************************/
101    
102 nigel 53 rc = pcre_exec(
103     re, /* the compiled pattern */
104     NULL, /* no extra data - we didn't study the pattern */
105 nigel 63 subject, /* the subject string */
106     subject_length, /* the length of the subject */
107 nigel 53 0, /* start at offset 0 in the subject */
108     0, /* default options */
109     ovector, /* output vector for substring information */
110     OVECCOUNT); /* number of elements in the output vector */
111    
112     /* Matching failed: handle error cases */
113    
114     if (rc < 0)
115     {
116     switch(rc)
117     {
118     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
119     /*
120     Handle other special cases if you like
121     */
122     default: printf("Matching error %d\n", rc); break;
123     }
124 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
125 nigel 53 return 1;
126     }
127    
128     /* Match succeded */
129    
130 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
131 nigel 53
132 nigel 63
133     /*************************************************************************
134     * We have found the first match within the subject string. If the output *
135     * vector wasn't big enough, set its size to the maximum. Then output any *
136     * substrings that were captured. *
137     *************************************************************************/
138    
139 nigel 53 /* The output vector wasn't big enough */
140    
141     if (rc == 0)
142     {
143     rc = OVECCOUNT/3;
144     printf("ovector only has room for %d captured substrings\n", rc - 1);
145     }
146    
147 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
148     application you might want to do things other than print them. */
149 nigel 53
150     for (i = 0; i < rc; i++)
151     {
152 nigel 63 char *substring_start = subject + ovector[2*i];
153 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
154     printf("%2d: %.*s\n", i, substring_length, substring_start);
155     }
156    
157 nigel 63
158 nigel 75 /**************************************************************************
159     * That concludes the basic part of this demonstration program. We have *
160     * compiled a pattern, and performed a single match. The code that follows *
161     * first shows how to access named substrings, and then how to code for *
162     * repeated matches on the same subject. *
163     **************************************************************************/
164 nigel 63
165     /* See if there are any named substrings, and if so, show them by name. First
166     we have to extract the count of named parentheses from the pattern. */
167    
168     (void)pcre_fullinfo(
169     re, /* the compiled pattern */
170     NULL, /* no extra data - we didn't study the pattern */
171     PCRE_INFO_NAMECOUNT, /* number of named substrings */
172     &namecount); /* where to put the answer */
173    
174     if (namecount <= 0) printf("No named substrings\n"); else
175     {
176     unsigned char *tabptr;
177     printf("Named substrings\n");
178    
179     /* Before we can access the substrings, we must extract the table for
180     translating names to numbers, and the size of each entry in the table. */
181    
182     (void)pcre_fullinfo(
183     re, /* the compiled pattern */
184     NULL, /* no extra data - we didn't study the pattern */
185     PCRE_INFO_NAMETABLE, /* address of the table */
186     &name_table); /* where to put the answer */
187    
188     (void)pcre_fullinfo(
189     re, /* the compiled pattern */
190     NULL, /* no extra data - we didn't study the pattern */
191     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
192     &name_entry_size); /* where to put the answer */
193    
194     /* Now we can scan the table and, for each entry, print the number, the name,
195     and the substring itself. */
196    
197     tabptr = name_table;
198     for (i = 0; i < namecount; i++)
199     {
200     int n = (tabptr[0] << 8) | tabptr[1];
201     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
202     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
203     tabptr += name_entry_size;
204     }
205     }
206    
207    
208     /*************************************************************************
209     * If the "-g" option was given on the command line, we want to continue *
210     * to search for additional matches in the subject string, in a similar *
211     * way to the /g option in Perl. This turns out to be trickier than you *
212     * might think because of the possibility of matching an empty string. *
213     * What happens is as follows: *
214     * *
215     * If the previous match was NOT for an empty string, we can just start *
216     * the next match at the end of the previous one. *
217     * *
218     * If the previous match WAS for an empty string, we can't do that, as it *
219     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
220     * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
221     * of these tells PCRE that an empty string is not a valid match; other *
222     * possibilities must be tried. The second flag restricts PCRE to one *
223     * match attempt at the initial string position. If this match succeeds, *
224     * an alternative to the empty string match has been found, and we can *
225     * proceed round the loop. *
226     *************************************************************************/
227    
228 nigel 75 if (!find_all)
229     {
230 nigel 91 pcre_free(re); /* Release the memory used for the compiled pattern */
231     return 0; /* Finish unless -g was given */
232 nigel 75 }
233 nigel 63
234     /* Loop for second and subsequent matches */
235    
236     for (;;)
237     {
238     int options = 0; /* Normally no options */
239     int start_offset = ovector[1]; /* Start at end of previous match */
240    
241     /* If the previous match was for an empty string, we are finished if we are
242     at the end of the subject. Otherwise, arrange to run another match at the
243     same point to see if a non-empty match can be found. */
244    
245     if (ovector[0] == ovector[1])
246     {
247     if (ovector[0] == subject_length) break;
248     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
249     }
250    
251     /* Run the next matching operation */
252    
253     rc = pcre_exec(
254     re, /* the compiled pattern */
255     NULL, /* no extra data - we didn't study the pattern */
256     subject, /* the subject string */
257     subject_length, /* the length of the subject */
258     start_offset, /* starting offset in the subject */
259     options, /* options */
260     ovector, /* output vector for substring information */
261     OVECCOUNT); /* number of elements in the output vector */
262    
263     /* This time, a result of NOMATCH isn't an error. If the value in "options"
264     is zero, it just means we have found all possible matches, so the loop ends.
265     Otherwise, it means we have failed to find a non-empty-string match at a
266     point where there was a previous empty-string match. In this case, we do what
267     Perl does: advance the matching position by one, and continue. We do this by
268     setting the "end of previous match" offset, because that is picked up at the
269     top of the loop as the point at which to start again. */
270    
271     if (rc == PCRE_ERROR_NOMATCH)
272     {
273     if (options == 0) break;
274     ovector[1] = start_offset + 1;
275     continue; /* Go round the loop again */
276     }
277    
278     /* Other matching errors are not recoverable. */
279    
280     if (rc < 0)
281     {
282     printf("Matching error %d\n", rc);
283 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
284 nigel 63 return 1;
285     }
286    
287     /* Match succeded */
288    
289     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
290    
291     /* The match succeeded, but the output vector wasn't big enough. */
292    
293     if (rc == 0)
294     {
295     rc = OVECCOUNT/3;
296     printf("ovector only has room for %d captured substrings\n", rc - 1);
297     }
298    
299     /* As before, show substrings stored in the output vector by number, and then
300     also any named substrings. */
301    
302     for (i = 0; i < rc; i++)
303     {
304     char *substring_start = subject + ovector[2*i];
305     int substring_length = ovector[2*i+1] - ovector[2*i];
306     printf("%2d: %.*s\n", i, substring_length, substring_start);
307     }
308    
309     if (namecount <= 0) printf("No named substrings\n"); else
310     {
311     unsigned char *tabptr = name_table;
312     printf("Named substrings\n");
313     for (i = 0; i < namecount; i++)
314     {
315     int n = (tabptr[0] << 8) | tabptr[1];
316     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
317     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
318     tabptr += name_entry_size;
319     }
320     }
321     } /* End of loop to find second and subsequent matches */
322    
323     printf("\n");
324 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
325 nigel 53 return 0;
326     }
327    
328 nigel 63 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12