/[pcre]/code/trunk/doc/pcredemo.3
ViewVC logotype

Contents of /code/trunk/doc/pcredemo.3

Parent Directory Parent Directory | Revision Log Revision Log


Revision 453 - (hide annotations) (download)
Fri Sep 18 19:12:35 2009 UTC (3 years, 8 months ago) by ph10
File size: 12437 byte(s)
Add more explanation about recursive subpatterns, and make it possible to 
process the documenation without building a whole release.

1 ph10 429 .\" Start example.
2     .de EX
3     . nr mE \\n(.f
4     . nf
5     . nh
6     . ft CW
7     ..
8     .
9     .
10     .\" End example.
11     .de EE
12     . ft \\n(mE
13     . fi
14     . hy \\n(HY
15     ..
16     .
17     .EX
18     /*************************************************
19     * PCRE DEMONSTRATION PROGRAM *
20     *************************************************/
21    
22     /* This is a demonstration program to illustrate the most straightforward ways
23     of calling the PCRE regular expression library from a C program. See the
24     pcresample documentation for a short discussion ("man pcresample" if you have
25     the PCRE man pages installed).
26    
27     In Unix-like environments, compile this program thuswise:
28    
29     gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e
30     -R/usr/local/lib -lpcre
31    
32     Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
33     library files for PCRE are installed on your system. You don't need -I and -L
34     if PCRE is installed in the standard system libraries. Only some operating
35     systems (e.g. Solaris) use the -R option.
36    
37     Building under Windows:
38    
39     If you want to statically link this program against a non-dll .a file, you must
40     define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
41     pcre_free() exported functions will be declared __declspec(dllimport), with
42     unwanted results. So in this environment, uncomment the following line. */
43    
44     /* #define PCRE_STATIC */
45    
46     #include <stdio.h>
47     #include <string.h>
48     #include <pcre.h>
49    
50     #define OVECCOUNT 30 /* should be a multiple of 3 */
51    
52    
53     int main(int argc, char **argv)
54     {
55     pcre *re;
56     const char *error;
57     char *pattern;
58     char *subject;
59     unsigned char *name_table;
60     int erroffset;
61     int find_all;
62     int namecount;
63     int name_entry_size;
64     int ovector[OVECCOUNT];
65     int subject_length;
66     int rc, i;
67    
68    
69     /**************************************************************************
70     * First, sort out the command line. There is only one possible option at *
71     * the moment, "-g" to request repeated matching to find all occurrences, *
72     * like Perl's /g option. We set the variable find_all to a non-zero value *
73     * if the -g option is present. Apart from that, there must be exactly two *
74     * arguments. *
75     **************************************************************************/
76    
77     find_all = 0;
78     for (i = 1; i < argc; i++)
79     {
80     if (strcmp(argv[i], "-g") == 0) find_all = 1;
81     else break;
82     }
83    
84     /* After the options, we require exactly two arguments, which are the pattern,
85     and the subject string. */
86    
87     if (argc - i != 2)
88     {
89     printf("Two arguments required: a regex and a subject string\en");
90     return 1;
91     }
92    
93     pattern = argv[i];
94     subject = argv[i+1];
95     subject_length = (int)strlen(subject);
96    
97    
98     /*************************************************************************
99     * Now we are going to compile the regular expression pattern, and handle *
100     * and errors that are detected. *
101     *************************************************************************/
102    
103     re = pcre_compile(
104     pattern, /* the pattern */
105     0, /* default options */
106     &error, /* for error message */
107     &erroffset, /* for error offset */
108     NULL); /* use default character tables */
109    
110     /* Compilation failed: print the error message and exit */
111    
112     if (re == NULL)
113     {
114     printf("PCRE compilation failed at offset %d: %s\en", erroffset, error);
115     return 1;
116     }
117    
118    
119     /*************************************************************************
120     * If the compilation succeeded, we call PCRE again, in order to do a *
121     * pattern match against the subject string. This does just ONE match. If *
122     * further matching is needed, it will be done below. *
123     *************************************************************************/
124    
125     rc = pcre_exec(
126     re, /* the compiled pattern */
127     NULL, /* no extra data - we didn't study the pattern */
128     subject, /* the subject string */
129     subject_length, /* the length of the subject */
130     0, /* start at offset 0 in the subject */
131     0, /* default options */
132     ovector, /* output vector for substring information */
133     OVECCOUNT); /* number of elements in the output vector */
134    
135     /* Matching failed: handle error cases */
136    
137     if (rc < 0)
138     {
139     switch(rc)
140     {
141     case PCRE_ERROR_NOMATCH: printf("No match\en"); break;
142     /*
143     Handle other special cases if you like
144     */
145     default: printf("Matching error %d\en", rc); break;
146     }
147     pcre_free(re); /* Release memory used for the compiled pattern */
148     return 1;
149     }
150    
151     /* Match succeded */
152    
153     printf("\enMatch succeeded at offset %d\en", ovector[0]);
154    
155    
156     /*************************************************************************
157     * We have found the first match within the subject string. If the output *
158     * vector wasn't big enough, say so. Then output any substrings that were *
159     * captured. *
160     *************************************************************************/
161    
162     /* The output vector wasn't big enough */
163    
164     if (rc == 0)
165     {
166     rc = OVECCOUNT/3;
167     printf("ovector only has room for %d captured substrings\en", rc - 1);
168     }
169    
170     /* Show substrings stored in the output vector by number. Obviously, in a real
171     application you might want to do things other than print them. */
172    
173     for (i = 0; i < rc; i++)
174     {
175     char *substring_start = subject + ovector[2*i];
176     int substring_length = ovector[2*i+1] - ovector[2*i];
177     printf("%2d: %.*s\en", i, substring_length, substring_start);
178     }
179    
180    
181     /**************************************************************************
182     * That concludes the basic part of this demonstration program. We have *
183     * compiled a pattern, and performed a single match. The code that follows *
184     * shows first how to access named substrings, and then how to code for *
185     * repeated matches on the same subject. *
186     **************************************************************************/
187    
188     /* See if there are any named substrings, and if so, show them by name. First
189     we have to extract the count of named parentheses from the pattern. */
190    
191     (void)pcre_fullinfo(
192     re, /* the compiled pattern */
193     NULL, /* no extra data - we didn't study the pattern */
194     PCRE_INFO_NAMECOUNT, /* number of named substrings */
195     &namecount); /* where to put the answer */
196    
197     if (namecount <= 0) printf("No named substrings\en"); else
198     {
199     unsigned char *tabptr;
200     printf("Named substrings\en");
201    
202     /* Before we can access the substrings, we must extract the table for
203     translating names to numbers, and the size of each entry in the table. */
204    
205     (void)pcre_fullinfo(
206     re, /* the compiled pattern */
207     NULL, /* no extra data - we didn't study the pattern */
208     PCRE_INFO_NAMETABLE, /* address of the table */
209     &name_table); /* where to put the answer */
210    
211     (void)pcre_fullinfo(
212     re, /* the compiled pattern */
213     NULL, /* no extra data - we didn't study the pattern */
214     PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
215     &name_entry_size); /* where to put the answer */
216    
217     /* Now we can scan the table and, for each entry, print the number, the name,
218     and the substring itself. */
219    
220     tabptr = name_table;
221     for (i = 0; i < namecount; i++)
222     {
223     int n = (tabptr[0] << 8) | tabptr[1];
224     printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
225     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
226     tabptr += name_entry_size;
227     }
228     }
229    
230    
231     /*************************************************************************
232     * If the "-g" option was given on the command line, we want to continue *
233     * to search for additional matches in the subject string, in a similar *
234     * way to the /g option in Perl. This turns out to be trickier than you *
235     * might think because of the possibility of matching an empty string. *
236     * What happens is as follows: *
237     * *
238     * If the previous match was NOT for an empty string, we can just start *
239     * the next match at the end of the previous one. *
240     * *
241     * If the previous match WAS for an empty string, we can't do that, as it *
242     * would lead to an infinite loop. Instead, a special call of pcre_exec() *
243 ph10 453 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
244     * The first of these tells PCRE that an empty string at the start of the *
245     * subject is not a valid match; other possibilities must be tried. The *
246     * second flag restricts PCRE to one match attempt at the initial string *
247     * position. If this match succeeds, an alternative to the empty string *
248     * match has been found, and we can proceed round the loop. *
249 ph10 429 *************************************************************************/
250    
251     if (!find_all)
252     {
253     pcre_free(re); /* Release the memory used for the compiled pattern */
254     return 0; /* Finish unless -g was given */
255     }
256    
257     /* Loop for second and subsequent matches */
258    
259     for (;;)
260     {
261     int options = 0; /* Normally no options */
262     int start_offset = ovector[1]; /* Start at end of previous match */
263    
264     /* If the previous match was for an empty string, we are finished if we are
265     at the end of the subject. Otherwise, arrange to run another match at the
266     same point to see if a non-empty match can be found. */
267    
268     if (ovector[0] == ovector[1])
269     {
270     if (ovector[0] == subject_length) break;
271 ph10 453 options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
272 ph10 429 }
273    
274     /* Run the next matching operation */
275    
276     rc = pcre_exec(
277     re, /* the compiled pattern */
278     NULL, /* no extra data - we didn't study the pattern */
279     subject, /* the subject string */
280     subject_length, /* the length of the subject */
281     start_offset, /* starting offset in the subject */
282     options, /* options */
283     ovector, /* output vector for substring information */
284     OVECCOUNT); /* number of elements in the output vector */
285    
286     /* This time, a result of NOMATCH isn't an error. If the value in "options"
287     is zero, it just means we have found all possible matches, so the loop ends.
288     Otherwise, it means we have failed to find a non-empty-string match at a
289     point where there was a previous empty-string match. In this case, we do what
290     Perl does: advance the matching position by one, and continue. We do this by
291     setting the "end of previous match" offset, because that is picked up at the
292     top of the loop as the point at which to start again. */
293    
294     if (rc == PCRE_ERROR_NOMATCH)
295     {
296     if (options == 0) break;
297     ovector[1] = start_offset + 1;
298     continue; /* Go round the loop again */
299     }
300    
301     /* Other matching errors are not recoverable. */
302    
303     if (rc < 0)
304     {
305     printf("Matching error %d\en", rc);
306     pcre_free(re); /* Release memory used for the compiled pattern */
307     return 1;
308     }
309    
310     /* Match succeded */
311    
312     printf("\enMatch succeeded again at offset %d\en", ovector[0]);
313    
314     /* The match succeeded, but the output vector wasn't big enough. */
315    
316     if (rc == 0)
317     {
318     rc = OVECCOUNT/3;
319     printf("ovector only has room for %d captured substrings\en", rc - 1);
320     }
321    
322     /* As before, show substrings stored in the output vector by number, and then
323     also any named substrings. */
324    
325     for (i = 0; i < rc; i++)
326     {
327     char *substring_start = subject + ovector[2*i];
328     int substring_length = ovector[2*i+1] - ovector[2*i];
329     printf("%2d: %.*s\en", i, substring_length, substring_start);
330     }
331    
332     if (namecount <= 0) printf("No named substrings\en"); else
333     {
334     unsigned char *tabptr = name_table;
335     printf("Named substrings\en");
336     for (i = 0; i < namecount; i++)
337     {
338     int n = (tabptr[0] << 8) | tabptr[1];
339     printf("(%d) %*s: %.*s\en", n, name_entry_size - 3, tabptr + 2,
340     ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
341     tabptr += name_entry_size;
342     }
343     }
344     } /* End of loop to find second and subsequent matches */
345    
346     printf("\en");
347     pcre_free(re); /* Release memory used for the compiled pattern */
348     return 0;
349     }
350    
351     /* End of pcredemo.c */
352     .EE

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12