/[pcre]/code/tags/pcre-4.3/pcredemo.c
ViewVC logotype

Diff of /code/tags/pcre-4.3/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC
# Line 1  Line 1 
1  #include <stdio.h>  /*************************************************
2  #include <string.h>  *           PCRE DEMONSTRATION PROGRAM           *
3  #include <pcre.h>  *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion.
8    
9  /* Compile thuswise:  Compile thuswise:
10    gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \    gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \
11      -R/opt/local/lib -lpcre      -R/opt/local/lib -lpcre
12    
13    Replace "/opt/local/include" and "/opt/local/lib" with wherever the include and
14    library files for PCRE are installed on your system. Only some operating
15    systems (e.g. Solaris) use the -R option.
16  */  */
17    
18    
19    #include <stdio.h>
20    #include <string.h>
21    #include <pcre.h>
22    
23  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
24    
25    
26  int main(int argc, char **argv)  int main(int argc, char **argv)
27  {  {
28  pcre *re;  pcre *re;
29  const char *error;  const char *error;
30    char *pattern;
31    char *subject;
32    unsigned char *name_table;
33  int erroffset;  int erroffset;
34    int find_all;
35    int namecount;
36    int name_entry_size;
37  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
38    int subject_length;
39  int rc, i;  int rc, i;
40    
41  if (argc != 3)  
42    /*************************************************************************
43    * First, sort out the command line. There is only one possible option at *
44    * the moment, "-g" to request repeated matching to find all occurrences, *
45    * like Perl's /g option. We set the variable find_all non-zero if it is  *
46    * present. Apart from that, there must be exactly two arguments.         *
47    *************************************************************************/
48    
49    find_all = 0;
50    for (i = 1; i < argc; i++)
51      {
52      if (strcmp(argv[i], "-g") == 0) find_all = 1;
53        else break;
54      }
55    
56    /* After the options, we require exactly two arguments, which are the pattern,
57    and the subject string. */
58    
59    if (argc - i != 2)
60    {    {
61    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
62    return 1;    return 1;
63    }    }
64    
65  /* Compile the regular expression in the first argument */  pattern = argv[i];
66    subject = argv[i+1];
67    subject_length = (int)strlen(subject);
68    
69    
70    /*************************************************************************
71    * Now we are going to compile the regular expression pattern, and handle *
72    * and errors that are detected.                                          *
73    *************************************************************************/
74    
75  re = pcre_compile(  re = pcre_compile(
76    argv[1],              /* the pattern */    pattern,              /* the pattern */
77    0,                    /* default options */    0,                    /* default options */
78    &error,               /* for error message */    &error,               /* for error message */
79    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 87  if (re == NULL)
87    return 1;    return 1;
88    }    }
89    
90  /* Compilation succeeded: match the subject in the second argument */  
91    /*************************************************************************
92    * If the compilation succeeded, we call PCRE again, in order to do a     *
93    * pattern match against the subject string. This just does ONE match. If *
94    * further matching is needed, it will be done below.                     *
95    *************************************************************************/
96    
97  rc = pcre_exec(  rc = pcre_exec(
98    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
99    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
100    argv[2],              /* the subject string */    subject,              /* the subject string */
101    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
102    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
103    0,                    /* default options */    0,                    /* default options */
104    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 69  if (rc < 0) Line 121  if (rc < 0)
121    
122  /* Match succeded */  /* Match succeded */
123    
124  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
125    
126    
127    /*************************************************************************
128    * We have found the first match within the subject string. If the output *
129    * vector wasn't big enough, set its size to the maximum. Then output any *
130    * substrings that were captured.                                         *
131    *************************************************************************/
132    
133  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
134    
# Line 79  if (rc == 0) Line 138  if (rc == 0)
138    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
139    }    }
140    
141  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
142    application you might want to do things other than print them. */
143    
144  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
145    {    {
146    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
147    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
148    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
149    }    }
150    
151    
152    /*************************************************************************
153    * That concludes the basic part of this demonstration program. We have   *
154    * compiled a pattern, and performed a single match. The code that follows*
155    * first shows how to access named substrings, and then how to code for   *
156    * repeated matches on the same subject.                                  *
157    *************************************************************************/
158    
159    /* See if there are any named substrings, and if so, show them by name. First
160    we have to extract the count of named parentheses from the pattern. */
161    
162    (void)pcre_fullinfo(
163      re,                   /* the compiled pattern */
164      NULL,                 /* no extra data - we didn't study the pattern */
165      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
166      &namecount);          /* where to put the answer */
167    
168    if (namecount <= 0) printf("No named substrings\n"); else
169      {
170      unsigned char *tabptr;
171      printf("Named substrings\n");
172    
173      /* Before we can access the substrings, we must extract the table for
174      translating names to numbers, and the size of each entry in the table. */
175    
176      (void)pcre_fullinfo(
177        re,                       /* the compiled pattern */
178        NULL,                     /* no extra data - we didn't study the pattern */
179        PCRE_INFO_NAMETABLE,      /* address of the table */
180        &name_table);             /* where to put the answer */
181    
182      (void)pcre_fullinfo(
183        re,                       /* the compiled pattern */
184        NULL,                     /* no extra data - we didn't study the pattern */
185        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
186        &name_entry_size);        /* where to put the answer */
187    
188      /* Now we can scan the table and, for each entry, print the number, the name,
189      and the substring itself. */
190    
191      tabptr = name_table;
192      for (i = 0; i < namecount; i++)
193        {
194        int n = (tabptr[0] << 8) | tabptr[1];
195        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
196          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
197        tabptr += name_entry_size;
198        }
199      }
200    
201    
202    /*************************************************************************
203    * If the "-g" option was given on the command line, we want to continue  *
204    * to search for additional matches in the subject string, in a similar   *
205    * way to the /g option in Perl. This turns out to be trickier than you   *
206    * might think because of the possibility of matching an empty string.    *
207    * What happens is as follows:                                            *
208    *                                                                        *
209    * If the previous match was NOT for an empty string, we can just start   *
210    * the next match at the end of the previous one.                         *
211    *                                                                        *
212    * If the previous match WAS for an empty string, we can't do that, as it *
213    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
214    * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
215    * of these tells PCRE that an empty string is not a valid match; other   *
216    * possibilities must be tried. The second flag restricts PCRE to one     *
217    * match attempt at the initial string position. If this match succeeds,  *
218    * an alternative to the empty string match has been found, and we can    *
219    * proceed round the loop.                                                *
220    *************************************************************************/
221    
222    if (!find_all) return 0;   /* Finish unless -g was given */
223    
224    /* Loop for second and subsequent matches */
225    
226    for (;;)
227      {
228      int options = 0;                 /* Normally no options */
229      int start_offset = ovector[1];   /* Start at end of previous match */
230    
231      /* If the previous match was for an empty string, we are finished if we are
232      at the end of the subject. Otherwise, arrange to run another match at the
233      same point to see if a non-empty match can be found. */
234    
235      if (ovector[0] == ovector[1])
236        {
237        if (ovector[0] == subject_length) break;
238        options = PCRE_NOTEMPTY | PCRE_ANCHORED;
239        }
240    
241      /* Run the next matching operation */
242    
243      rc = pcre_exec(
244        re,                   /* the compiled pattern */
245        NULL,                 /* no extra data - we didn't study the pattern */
246        subject,              /* the subject string */
247        subject_length,       /* the length of the subject */
248        start_offset,         /* starting offset in the subject */
249        options,              /* options */
250        ovector,              /* output vector for substring information */
251        OVECCOUNT);           /* number of elements in the output vector */
252    
253      /* This time, a result of NOMATCH isn't an error. If the value in "options"
254      is zero, it just means we have found all possible matches, so the loop ends.
255      Otherwise, it means we have failed to find a non-empty-string match at a
256      point where there was a previous empty-string match. In this case, we do what
257      Perl does: advance the matching position by one, and continue. We do this by
258      setting the "end of previous match" offset, because that is picked up at the
259      top of the loop as the point at which to start again. */
260    
261      if (rc == PCRE_ERROR_NOMATCH)
262        {
263        if (options == 0) break;
264        ovector[1] = start_offset + 1;
265        continue;    /* Go round the loop again */
266        }
267    
268      /* Other matching errors are not recoverable. */
269    
270      if (rc < 0)
271        {
272        printf("Matching error %d\n", rc);
273        return 1;
274        }
275    
276      /* Match succeded */
277    
278      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
279    
280      /* The match succeeded, but the output vector wasn't big enough. */
281    
282      if (rc == 0)
283        {
284        rc = OVECCOUNT/3;
285        printf("ovector only has room for %d captured substrings\n", rc - 1);
286        }
287    
288      /* As before, show substrings stored in the output vector by number, and then
289      also any named substrings. */
290    
291      for (i = 0; i < rc; i++)
292        {
293        char *substring_start = subject + ovector[2*i];
294        int substring_length = ovector[2*i+1] - ovector[2*i];
295        printf("%2d: %.*s\n", i, substring_length, substring_start);
296        }
297    
298      if (namecount <= 0) printf("No named substrings\n"); else
299        {
300        unsigned char *tabptr = name_table;
301        printf("Named substrings\n");
302        for (i = 0; i < namecount; i++)
303          {
304          int n = (tabptr[0] << 8) | tabptr[1];
305          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
306            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
307          tabptr += name_entry_size;
308          }
309        }
310      }      /* End of loop to find second and subsequent matches */
311    
312    printf("\n");
313  return 0;  return 0;
314  }  }
315    
316    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.63

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12