/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Tue Jul 31 14:39:09 2007 UTC (7 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 11813 byte(s)
Daniel's patch for config.h and Windows DLL declarations (not fully working).

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7     pcresample documentation for a short discussion.
8    
9     Compile thuswise:
10 nigel 75 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11     -R/usr/local/lib -lpcre
12 nigel 63
13 nigel 75 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14 ph10 199 library files for PCRE are installed on your system. You don't need -I and -L
15     if PCRE is installed in the standard system libraries. Only some operating
16 nigel 63 systems (e.g. Solaris) use the -R option.
17 nigel 53 */
18    
19 nigel 63
20     #include <stdio.h>
21     #include <string.h>
22     #include <pcre.h>
23    
24 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
25    
26 nigel 63
27 nigel 53 int main(int argc, char **argv)
28     {
29     pcre *re;
30     const char *error;
31 nigel 63 char *pattern;
32     char *subject;
33     unsigned char *name_table;
34 nigel 53 int erroffset;
35 nigel 63 int find_all;
36     int namecount;
37     int name_entry_size;
38 nigel 53 int ovector[OVECCOUNT];
39 nigel 63 int subject_length;
40 nigel 53 int rc, i;
41    
42 nigel 63
43 nigel 75 /**************************************************************************
44     * First, sort out the command line. There is only one possible option at *
45     * the moment, "-g" to request repeated matching to find all occurrences, *
46     * like Perl's /g option. We set the variable find_all to a non-zero value *
47     * if the -g option is present. Apart from that, there must be exactly two *
48     * arguments. *
49     **************************************************************************/
50 nigel 63
51     find_all = 0;
52     for (i = 1; i < argc; i++)
53 nigel 53 {
54 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
55     else break;
56     }
57    
58     /* After the options, we require exactly two arguments, which are the pattern,
59     and the subject string. */
60    
61     if (argc - i != 2)
62     {
63 nigel 53 printf("Two arguments required: a regex and a subject string\n");
64     return 1;
65     }
66    
67 nigel 63 pattern = argv[i];
68     subject = argv[i+1];
69     subject_length = (int)strlen(subject);
70 nigel 53
71 nigel 63
72     /*************************************************************************
73     * Now we are going to compile the regular expression pattern, and handle *
74     * and errors that are detected. *
75     *************************************************************************/
76    
77 nigel 53 re = pcre_compile(
78 nigel 63 pattern, /* the pattern */
79 nigel 53 0, /* default options */
80     &error, /* for error message */
81     &erroffset, /* for error offset */
82     NULL); /* use default character tables */
83    
84     /* Compilation failed: print the error message and exit */
85    
86     if (re == NULL)
87     {
88     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
89     return 1;
90     }
91    
92    
93 nigel 63 /*************************************************************************
94     * If the compilation succeeded, we call PCRE again, in order to do a *
95 nigel 75 * pattern match against the subject string. This does just ONE match. If *
96 nigel 63 * further matching is needed, it will be done below. *
97     *************************************************************************/
98    
99 nigel 53 rc = pcre_exec(
100     re, /* the compiled pattern */
101     NULL, /* no extra data - we didn't study the pattern */
102 nigel 63 subject, /* the subject string */
103     subject_length, /* the length of the subject */
104 nigel 53 0, /* start at offset 0 in the subject */
105     0, /* default options */
106     ovector, /* output vector for substring information */
107     OVECCOUNT); /* number of elements in the output vector */
108    
109     /* Matching failed: handle error cases */
110    
111     if (rc < 0)
112     {
113     switch(rc)
114     {
115     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
116     /*
117     Handle other special cases if you like
118     */
119     default: printf("Matching error %d\n", rc); break;
120     }
121 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
122 nigel 53 return 1;
123     }
124    
125     /* Match succeded */
126    
127 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
128 nigel 53
129 nigel 63
130     /*************************************************************************
131     * We have found the first match within the subject string. If the output *
132     * vector wasn't big enough, set its size to the maximum. Then output any *
133     * substrings that were captured. *
134     *************************************************************************/
135    
136 nigel 53 /* The output vector wasn't big enough */
137    
138     if (rc == 0)
139     {
140     rc = OVECCOUNT/3;
141     printf("ovector only has room for %d captured substrings\n", rc - 1);
142     }
143    
144 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
145     application you might want to do things other than print them. */
146 nigel 53
147     for (i = 0; i < rc; i++)
148     {
149 nigel 63 char *substring_start = subject + ovector[2*i];
150 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
151     printf("%2d: %.*s\n", i, substring_length, substring_start);
152     }
153    
154 nigel 63
155 nigel 75 /**************************************************************************
156     * That concludes the basic part of this demonstration program. We have *
157     * compiled a pattern, and performed a single match. The code that follows *
158     * first shows how to access named substrings, and then how to code for *
159     * repeated matches on the same subject. *
160     **************************************************************************/
161 nigel 63
162     /* See if there are any named substrings, and if so, show them by name. First
163     we have to extract the count of named parentheses from the pattern. */
164    
165     (void)pcre_fullinfo(
166     re, /* the compiled pattern */
167     NULL, /* no extra data - we didn't study the pattern */
168     PCRE_INFO_NAMECOUNT, /* number of named substrings */
169     &namecount); /* where to put the answer */
170    
171     if (namecount <= 0) printf("No named substrings\n"); else
172     {
173     unsigned char *tabptr;
174     printf("Named substrings\n");
175    
176     /* Before we can access the substrings, we must extract the table for
177     translating names to numbers, and the size of each entry in the table. */
178    
179     (void)pcre_fullinfo(
180     re, /* the compiled pattern */
181     NULL, /* no extra data - we didn't study the pattern */
182     PCRE_INFO_NAMETABLE, /* address of the table */
183     &name_table); /* where to put the answer */
184    
185     (void)pcre_fullinfo(
186     re, /* the compiled pattern */
187     NULL, /* no extra data - we didn't study the pattern */
188     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
189     &name_entry_size); /* where to put the answer */
190    
191     /* Now we can scan the table and, for each entry, print the number, the name,
192     and the substring itself. */
193    
194     tabptr = name_table;
195     for (i = 0; i < namecount; i++)
196     {
197     int n = (tabptr[0] << 8) | tabptr[1];
198     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
199     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
200     tabptr += name_entry_size;
201     }
202     }
203    
204    
205     /*************************************************************************
206     * If the "-g" option was given on the command line, we want to continue *
207     * to search for additional matches in the subject string, in a similar *
208     * way to the /g option in Perl. This turns out to be trickier than you *
209     * might think because of the possibility of matching an empty string. *
210     * What happens is as follows: *
211     * *
212     * If the previous match was NOT for an empty string, we can just start *
213     * the next match at the end of the previous one. *
214     * *
215     * If the previous match WAS for an empty string, we can't do that, as it *
216     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
217     * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first *
218     * of these tells PCRE that an empty string is not a valid match; other *
219     * possibilities must be tried. The second flag restricts PCRE to one *
220     * match attempt at the initial string position. If this match succeeds, *
221     * an alternative to the empty string match has been found, and we can *
222     * proceed round the loop. *
223     *************************************************************************/
224    
225 nigel 75 if (!find_all)
226     {
227 nigel 91 pcre_free(re); /* Release the memory used for the compiled pattern */
228     return 0; /* Finish unless -g was given */
229 nigel 75 }
230 nigel 63
231     /* Loop for second and subsequent matches */
232    
233     for (;;)
234     {
235     int options = 0; /* Normally no options */
236     int start_offset = ovector[1]; /* Start at end of previous match */
237    
238     /* If the previous match was for an empty string, we are finished if we are
239     at the end of the subject. Otherwise, arrange to run another match at the
240     same point to see if a non-empty match can be found. */
241    
242     if (ovector[0] == ovector[1])
243     {
244     if (ovector[0] == subject_length) break;
245     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
246     }
247    
248     /* Run the next matching operation */
249    
250     rc = pcre_exec(
251     re, /* the compiled pattern */
252     NULL, /* no extra data - we didn't study the pattern */
253     subject, /* the subject string */
254     subject_length, /* the length of the subject */
255     start_offset, /* starting offset in the subject */
256     options, /* options */
257     ovector, /* output vector for substring information */
258     OVECCOUNT); /* number of elements in the output vector */
259    
260     /* This time, a result of NOMATCH isn't an error. If the value in "options"
261     is zero, it just means we have found all possible matches, so the loop ends.
262     Otherwise, it means we have failed to find a non-empty-string match at a
263     point where there was a previous empty-string match. In this case, we do what
264     Perl does: advance the matching position by one, and continue. We do this by
265     setting the "end of previous match" offset, because that is picked up at the
266     top of the loop as the point at which to start again. */
267    
268     if (rc == PCRE_ERROR_NOMATCH)
269     {
270     if (options == 0) break;
271     ovector[1] = start_offset + 1;
272     continue; /* Go round the loop again */
273     }
274    
275     /* Other matching errors are not recoverable. */
276    
277     if (rc < 0)
278     {
279     printf("Matching error %d\n", rc);
280 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
281 nigel 63 return 1;
282     }
283    
284     /* Match succeded */
285    
286     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
287    
288     /* The match succeeded, but the output vector wasn't big enough. */
289    
290     if (rc == 0)
291     {
292     rc = OVECCOUNT/3;
293     printf("ovector only has room for %d captured substrings\n", rc - 1);
294     }
295    
296     /* As before, show substrings stored in the output vector by number, and then
297     also any named substrings. */
298    
299     for (i = 0; i < rc; i++)
300     {
301     char *substring_start = subject + ovector[2*i];
302     int substring_length = ovector[2*i+1] - ovector[2*i];
303     printf("%2d: %.*s\n", i, substring_length, substring_start);
304     }
305    
306     if (namecount <= 0) printf("No named substrings\n"); else
307     {
308     unsigned char *tabptr = name_table;
309     printf("Named substrings\n");
310     for (i = 0; i < namecount; i++)
311     {
312     int n = (tabptr[0] << 8) | tabptr[1];
313     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
314     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
315     tabptr += name_entry_size;
316     }
317     }
318     } /* End of loop to find second and subsequent matches */
319    
320     printf("\n");
321 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
322 nigel 53 return 0;
323     }
324    
325 nigel 63 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12