/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Contents of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 487 - (hide annotations) (download)
Wed Jan 6 10:26:55 2010 UTC (4 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 12655 byte(s)
Tidying updates for 8.01-RC1 release.

1 nigel 63 /*************************************************
2     * PCRE DEMONSTRATION PROGRAM *
3     *************************************************/
4 nigel 53
5 nigel 63 /* This is a demonstration program to illustrate the most straightforward ways
6     of calling the PCRE regular expression library from a C program. See the
7 ph10 319 pcresample documentation for a short discussion ("man pcresample" if you have
8 ph10 315 the PCRE man pages installed).
9 nigel 63
10 ph10 487 In Unix-like environments, if PCRE is installed in your standard system
11 ph10 477 libraries, you should be able to compile this program using this command:
12 ph10 315
13 ph10 477 gcc -Wall pcredemo.c -lpcre -o pcredemo
14 nigel 63
15 ph10 477 If PCRE is not installed in a standard place, it is likely to be installed with
16     support for the pkg-config mechanism. If you have pkg-config, you can compile
17     this program using this command:
18    
19     gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
20    
21     If you do not have pkg-config, you may have to use this:
22    
23     gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
24     -R/usr/local/lib -lpcre -o pcredemo
25    
26 nigel 75 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
27 ph10 477 library files for PCRE are installed on your system. Only some operating
28 nigel 63 systems (e.g. Solaris) use the -R option.
29 nigel 53
30 ph10 315 Building under Windows:
31 nigel 63
32 ph10 315 If you want to statically link this program against a non-dll .a file, you must
33     define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
34     pcre_free() exported functions will be declared __declspec(dllimport), with
35     unwanted results. So in this environment, uncomment the following line. */
36    
37     /* #define PCRE_STATIC */
38    
39 nigel 63 #include <stdio.h>
40     #include <string.h>
41     #include <pcre.h>
42    
43 nigel 53 #define OVECCOUNT 30 /* should be a multiple of 3 */
44    
45 nigel 63
46 nigel 53 int main(int argc, char **argv)
47     {
48     pcre *re;
49     const char *error;
50 nigel 63 char *pattern;
51     char *subject;
52     unsigned char *name_table;
53 nigel 53 int erroffset;
54 nigel 63 int find_all;
55     int namecount;
56     int name_entry_size;
57 nigel 53 int ovector[OVECCOUNT];
58 nigel 63 int subject_length;
59 nigel 53 int rc, i;
60    
61 nigel 63
62 nigel 75 /**************************************************************************
63     * First, sort out the command line. There is only one possible option at *
64     * the moment, "-g" to request repeated matching to find all occurrences, *
65     * like Perl's /g option. We set the variable find_all to a non-zero value *
66     * if the -g option is present. Apart from that, there must be exactly two *
67     * arguments. *
68     **************************************************************************/
69 nigel 63
70     find_all = 0;
71     for (i = 1; i < argc; i++)
72 nigel 53 {
73 nigel 63 if (strcmp(argv[i], "-g") == 0) find_all = 1;
74     else break;
75     }
76    
77     /* After the options, we require exactly two arguments, which are the pattern,
78     and the subject string. */
79    
80     if (argc - i != 2)
81     {
82 nigel 53 printf("Two arguments required: a regex and a subject string\n");
83     return 1;
84     }
85    
86 nigel 63 pattern = argv[i];
87     subject = argv[i+1];
88     subject_length = (int)strlen(subject);
89 nigel 53
90 nigel 63
91     /*************************************************************************
92     * Now we are going to compile the regular expression pattern, and handle *
93     * and errors that are detected. *
94     *************************************************************************/
95    
96 nigel 53 re = pcre_compile(
97 nigel 63 pattern, /* the pattern */
98 nigel 53 0, /* default options */
99     &error, /* for error message */
100     &erroffset, /* for error offset */
101     NULL); /* use default character tables */
102    
103     /* Compilation failed: print the error message and exit */
104    
105     if (re == NULL)
106     {
107     printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
108     return 1;
109     }
110    
111    
112 nigel 63 /*************************************************************************
113     * If the compilation succeeded, we call PCRE again, in order to do a *
114 nigel 75 * pattern match against the subject string. This does just ONE match. If *
115 nigel 63 * further matching is needed, it will be done below. *
116     *************************************************************************/
117    
118 nigel 53 rc = pcre_exec(
119     re, /* the compiled pattern */
120     NULL, /* no extra data - we didn't study the pattern */
121 nigel 63 subject, /* the subject string */
122     subject_length, /* the length of the subject */
123 nigel 53 0, /* start at offset 0 in the subject */
124     0, /* default options */
125     ovector, /* output vector for substring information */
126     OVECCOUNT); /* number of elements in the output vector */
127    
128     /* Matching failed: handle error cases */
129    
130     if (rc < 0)
131     {
132     switch(rc)
133     {
134     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
135     /*
136     Handle other special cases if you like
137     */
138     default: printf("Matching error %d\n", rc); break;
139     }
140 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
141 nigel 53 return 1;
142     }
143    
144     /* Match succeded */
145    
146 nigel 63 printf("\nMatch succeeded at offset %d\n", ovector[0]);
147 nigel 53
148 nigel 63
149     /*************************************************************************
150     * We have found the first match within the subject string. If the output *
151 ph10 315 * vector wasn't big enough, say so. Then output any substrings that were *
152     * captured. *
153 nigel 63 *************************************************************************/
154    
155 nigel 53 /* The output vector wasn't big enough */
156    
157     if (rc == 0)
158     {
159     rc = OVECCOUNT/3;
160     printf("ovector only has room for %d captured substrings\n", rc - 1);
161     }
162    
163 nigel 63 /* Show substrings stored in the output vector by number. Obviously, in a real
164     application you might want to do things other than print them. */
165 nigel 53
166     for (i = 0; i < rc; i++)
167     {
168 nigel 63 char *substring_start = subject + ovector[2*i];
169 nigel 53 int substring_length = ovector[2*i+1] - ovector[2*i];
170     printf("%2d: %.*s\n", i, substring_length, substring_start);
171     }
172    
173 nigel 63
174 nigel 75 /**************************************************************************
175     * That concludes the basic part of this demonstration program. We have *
176     * compiled a pattern, and performed a single match. The code that follows *
177 ph10 315 * shows first how to access named substrings, and then how to code for *
178 nigel 75 * repeated matches on the same subject. *
179     **************************************************************************/
180 nigel 63
181     /* See if there are any named substrings, and if so, show them by name. First
182     we have to extract the count of named parentheses from the pattern. */
183    
184     (void)pcre_fullinfo(
185     re, /* the compiled pattern */
186     NULL, /* no extra data - we didn't study the pattern */
187     PCRE_INFO_NAMECOUNT, /* number of named substrings */
188     &namecount); /* where to put the answer */
189    
190     if (namecount <= 0) printf("No named substrings\n"); else
191     {
192     unsigned char *tabptr;
193     printf("Named substrings\n");
194    
195     /* Before we can access the substrings, we must extract the table for
196     translating names to numbers, and the size of each entry in the table. */
197    
198     (void)pcre_fullinfo(
199     re, /* the compiled pattern */
200     NULL, /* no extra data - we didn't study the pattern */
201     PCRE_INFO_NAMETABLE, /* address of the table */
202     &name_table); /* where to put the answer */
203    
204     (void)pcre_fullinfo(
205     re, /* the compiled pattern */
206     NULL, /* no extra data - we didn't study the pattern */
207     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
208     &name_entry_size); /* where to put the answer */
209    
210     /* Now we can scan the table and, for each entry, print the number, the name,
211     and the substring itself. */
212    
213     tabptr = name_table;
214     for (i = 0; i < namecount; i++)
215     {
216     int n = (tabptr[0] << 8) | tabptr[1];
217     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
218     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
219     tabptr += name_entry_size;
220     }
221     }
222    
223    
224     /*************************************************************************
225     * If the "-g" option was given on the command line, we want to continue *
226     * to search for additional matches in the subject string, in a similar *
227     * way to the /g option in Perl. This turns out to be trickier than you *
228     * might think because of the possibility of matching an empty string. *
229     * What happens is as follows: *
230     * *
231     * If the previous match was NOT for an empty string, we can just start *
232     * the next match at the end of the previous one. *
233     * *
234     * If the previous match WAS for an empty string, we can't do that, as it *
235     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
236 ph10 442 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
237     * The first of these tells PCRE that an empty string at the start of the *
238     * subject is not a valid match; other possibilities must be tried. The *
239     * second flag restricts PCRE to one match attempt at the initial string *
240     * position. If this match succeeds, an alternative to the empty string *
241     * match has been found, and we can proceed round the loop. *
242 nigel 63 *************************************************************************/
243    
244 nigel 75 if (!find_all)
245     {
246 nigel 91 pcre_free(re); /* Release the memory used for the compiled pattern */
247     return 0; /* Finish unless -g was given */
248 nigel 75 }
249 nigel 63
250     /* Loop for second and subsequent matches */
251    
252     for (;;)
253     {
254     int options = 0; /* Normally no options */
255     int start_offset = ovector[1]; /* Start at end of previous match */
256    
257     /* If the previous match was for an empty string, we are finished if we are
258     at the end of the subject. Otherwise, arrange to run another match at the
259     same point to see if a non-empty match can be found. */
260    
261     if (ovector[0] == ovector[1])
262     {
263     if (ovector[0] == subject_length) break;
264 ph10 442 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
265 nigel 63 }
266    
267     /* Run the next matching operation */
268    
269     rc = pcre_exec(
270     re, /* the compiled pattern */
271     NULL, /* no extra data - we didn't study the pattern */
272     subject, /* the subject string */
273     subject_length, /* the length of the subject */
274     start_offset, /* starting offset in the subject */
275     options, /* options */
276     ovector, /* output vector for substring information */
277     OVECCOUNT); /* number of elements in the output vector */
278    
279     /* This time, a result of NOMATCH isn't an error. If the value in "options"
280     is zero, it just means we have found all possible matches, so the loop ends.
281     Otherwise, it means we have failed to find a non-empty-string match at a
282     point where there was a previous empty-string match. In this case, we do what
283     Perl does: advance the matching position by one, and continue. We do this by
284     setting the "end of previous match" offset, because that is picked up at the
285     top of the loop as the point at which to start again. */
286    
287     if (rc == PCRE_ERROR_NOMATCH)
288     {
289     if (options == 0) break;
290     ovector[1] = start_offset + 1;
291     continue; /* Go round the loop again */
292     }
293    
294     /* Other matching errors are not recoverable. */
295    
296     if (rc < 0)
297     {
298     printf("Matching error %d\n", rc);
299 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
300 nigel 63 return 1;
301     }
302    
303     /* Match succeded */
304    
305     printf("\nMatch succeeded again at offset %d\n", ovector[0]);
306    
307     /* The match succeeded, but the output vector wasn't big enough. */
308    
309     if (rc == 0)
310     {
311     rc = OVECCOUNT/3;
312     printf("ovector only has room for %d captured substrings\n", rc - 1);
313     }
314    
315     /* As before, show substrings stored in the output vector by number, and then
316     also any named substrings. */
317    
318     for (i = 0; i < rc; i++)
319     {
320     char *substring_start = subject + ovector[2*i];
321     int substring_length = ovector[2*i+1] - ovector[2*i];
322     printf("%2d: %.*s\n", i, substring_length, substring_start);
323     }
324    
325     if (namecount <= 0) printf("No named substrings\n"); else
326     {
327     unsigned char *tabptr = name_table;
328     printf("Named substrings\n");
329     for (i = 0; i < namecount; i++)
330     {
331     int n = (tabptr[0] << 8) | tabptr[1];
332     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
333     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
334     tabptr += name_entry_size;
335     }
336     }
337     } /* End of loop to find second and subsequent matches */
338    
339     printf("\n");
340 nigel 91 pcre_free(re); /* Release memory used for the compiled pattern */
341 nigel 53 return 0;
342     }
343    
344 nigel 63 /* End of pcredemo.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12