/[pcre]/code/trunk/pcre_get.c
ViewVC logotype

Contents of /code/trunk/pcre_get.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 117 - (hide annotations) (download)
Fri Mar 9 15:59:06 2007 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 15178 byte(s)
Update copyright years to 2007.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains some convenience functions for extracting substrings
42     from the subject string after a regex match has succeeded. The original idea
43     for these functions came from Scott Wimer. */
44    
45    
46     #include "pcre_internal.h"
47    
48    
49     /*************************************************
50     * Find number for named string *
51     *************************************************/
52    
53 nigel 91 /* This function is used by the get_first_set() function below, as well
54     as being generally available. It assumes that names are unique.
55 nigel 77
56     Arguments:
57     code the compiled regex
58     stringname the name whose number is required
59    
60     Returns: the number of the named parentheses, or a negative number
61     (PCRE_ERROR_NOSUBSTRING) if not found
62     */
63    
64     int
65     pcre_get_stringnumber(const pcre *code, const char *stringname)
66     {
67     int rc;
68     int entrysize;
69     int top, bot;
70     uschar *nametable;
71    
72     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
73     return rc;
74     if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
75    
76     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
77     return rc;
78     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
79     return rc;
80    
81     bot = 0;
82     while (top > bot)
83     {
84     int mid = (top + bot) / 2;
85     uschar *entry = nametable + entrysize*mid;
86     int c = strcmp(stringname, (char *)(entry + 2));
87     if (c == 0) return (entry[0] << 8) + entry[1];
88     if (c > 0) bot = mid + 1; else top = mid;
89     }
90    
91     return PCRE_ERROR_NOSUBSTRING;
92     }
93    
94    
95    
96     /*************************************************
97 nigel 91 * Find (multiple) entries for named string *
98     *************************************************/
99    
100     /* This is used by the get_first_set() function below, as well as being
101     generally available. It is used when duplicated names are permitted.
102    
103     Arguments:
104     code the compiled regex
105     stringname the name whose entries required
106     firstptr where to put the pointer to the first entry
107     lastptr where to put the pointer to the last entry
108    
109     Returns: the length of each entry, or a negative number
110     (PCRE_ERROR_NOSUBSTRING) if not found
111     */
112    
113     int
114     pcre_get_stringtable_entries(const pcre *code, const char *stringname,
115     char **firstptr, char **lastptr)
116     {
117     int rc;
118     int entrysize;
119     int top, bot;
120     uschar *nametable, *lastentry;
121    
122     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
123     return rc;
124     if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
125    
126     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
127     return rc;
128     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
129     return rc;
130    
131     lastentry = nametable + entrysize * (top - 1);
132     bot = 0;
133     while (top > bot)
134     {
135     int mid = (top + bot) / 2;
136     uschar *entry = nametable + entrysize*mid;
137     int c = strcmp(stringname, (char *)(entry + 2));
138     if (c == 0)
139     {
140     uschar *first = entry;
141     uschar *last = entry;
142     while (first > nametable)
143     {
144     if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
145     first -= entrysize;
146     }
147     while (last < lastentry)
148     {
149     if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
150     last += entrysize;
151     }
152     *firstptr = (char *)first;
153     *lastptr = (char *)last;
154     return entrysize;
155     }
156     if (c > 0) bot = mid + 1; else top = mid;
157     }
158    
159     return PCRE_ERROR_NOSUBSTRING;
160     }
161    
162    
163    
164     /*************************************************
165     * Find first set of multiple named strings *
166     *************************************************/
167    
168     /* This function allows for duplicate names in the table of named substrings.
169     It returns the number of the first one that was set in a pattern match.
170    
171     Arguments:
172     code the compiled regex
173     stringname the name of the capturing substring
174     ovector the vector of matched substrings
175    
176     Returns: the number of the first that is set,
177     or the number of the last one if none are set,
178     or a negative number on error
179     */
180    
181     static int
182     get_first_set(const pcre *code, const char *stringname, int *ovector)
183     {
184     const real_pcre *re = (const real_pcre *)code;
185     int entrysize;
186     char *first, *last;
187     uschar *entry;
188     if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
189     return pcre_get_stringnumber(code, stringname);
190     entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
191     if (entrysize <= 0) return entrysize;
192     for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
193     {
194     int n = (entry[0] << 8) + entry[1];
195     if (ovector[n*2] >= 0) return n;
196     }
197     return (first[0] << 8) + first[1];
198     }
199    
200    
201    
202    
203     /*************************************************
204 nigel 77 * Copy captured string to given buffer *
205     *************************************************/
206    
207     /* This function copies a single captured substring into a given buffer.
208     Note that we use memcpy() rather than strncpy() in case there are binary zeros
209     in the string.
210    
211     Arguments:
212     subject the subject string that was matched
213     ovector pointer to the offsets table
214     stringcount the number of substrings that were captured
215     (i.e. the yield of the pcre_exec call, unless
216     that was zero, in which case it should be 1/3
217     of the offset table size)
218     stringnumber the number of the required substring
219     buffer where to put the substring
220     size the size of the buffer
221    
222     Returns: if successful:
223     the length of the copied string, not including the zero
224     that is put on the end; can be zero
225     if not successful:
226     PCRE_ERROR_NOMEMORY (-6) buffer too small
227     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
228     */
229    
230     int
231     pcre_copy_substring(const char *subject, int *ovector, int stringcount,
232     int stringnumber, char *buffer, int size)
233     {
234     int yield;
235     if (stringnumber < 0 || stringnumber >= stringcount)
236     return PCRE_ERROR_NOSUBSTRING;
237     stringnumber *= 2;
238     yield = ovector[stringnumber+1] - ovector[stringnumber];
239     if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
240     memcpy(buffer, subject + ovector[stringnumber], yield);
241     buffer[yield] = 0;
242     return yield;
243     }
244    
245    
246    
247     /*************************************************
248     * Copy named captured string to given buffer *
249     *************************************************/
250    
251     /* This function copies a single captured substring into a given buffer,
252 nigel 91 identifying it by name. If the regex permits duplicate names, the first
253     substring that is set is chosen.
254 nigel 77
255     Arguments:
256     code the compiled regex
257     subject the subject string that was matched
258     ovector pointer to the offsets table
259     stringcount the number of substrings that were captured
260     (i.e. the yield of the pcre_exec call, unless
261     that was zero, in which case it should be 1/3
262     of the offset table size)
263     stringname the name of the required substring
264     buffer where to put the substring
265     size the size of the buffer
266    
267     Returns: if successful:
268     the length of the copied string, not including the zero
269     that is put on the end; can be zero
270     if not successful:
271     PCRE_ERROR_NOMEMORY (-6) buffer too small
272     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
273     */
274    
275     int
276     pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
277     int stringcount, const char *stringname, char *buffer, int size)
278     {
279 nigel 91 int n = get_first_set(code, stringname, ovector);
280 nigel 77 if (n <= 0) return n;
281     return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
282     }
283    
284    
285    
286     /*************************************************
287     * Copy all captured strings to new store *
288     *************************************************/
289    
290     /* This function gets one chunk of store and builds a list of pointers and all
291     of the captured substrings in it. A NULL pointer is put on the end of the list.
292    
293     Arguments:
294     subject the subject string that was matched
295     ovector pointer to the offsets table
296     stringcount the number of substrings that were captured
297     (i.e. the yield of the pcre_exec call, unless
298     that was zero, in which case it should be 1/3
299     of the offset table size)
300     listptr set to point to the list of pointers
301    
302     Returns: if successful: 0
303     if not successful:
304     PCRE_ERROR_NOMEMORY (-6) failed to get store
305     */
306    
307     int
308     pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
309     const char ***listptr)
310     {
311     int i;
312     int size = sizeof(char *);
313     int double_count = stringcount * 2;
314     char **stringlist;
315     char *p;
316    
317     for (i = 0; i < double_count; i += 2)
318     size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
319    
320     stringlist = (char **)(pcre_malloc)(size);
321     if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
322    
323     *listptr = (const char **)stringlist;
324     p = (char *)(stringlist + stringcount + 1);
325    
326     for (i = 0; i < double_count; i += 2)
327     {
328     int len = ovector[i+1] - ovector[i];
329     memcpy(p, subject + ovector[i], len);
330     *stringlist++ = p;
331     p += len;
332     *p++ = 0;
333     }
334    
335     *stringlist = NULL;
336     return 0;
337     }
338    
339    
340    
341     /*************************************************
342     * Free store obtained by get_substring_list *
343     *************************************************/
344    
345     /* This function exists for the benefit of people calling PCRE from non-C
346     programs that can call its functions, but not free() or (pcre_free)() directly.
347    
348     Argument: the result of a previous pcre_get_substring_list()
349     Returns: nothing
350     */
351    
352     void
353     pcre_free_substring_list(const char **pointer)
354     {
355     (pcre_free)((void *)pointer);
356     }
357    
358    
359    
360     /*************************************************
361     * Copy captured string to new store *
362     *************************************************/
363    
364     /* This function copies a single captured substring into a piece of new
365     store
366    
367     Arguments:
368     subject the subject string that was matched
369     ovector pointer to the offsets table
370     stringcount the number of substrings that were captured
371     (i.e. the yield of the pcre_exec call, unless
372     that was zero, in which case it should be 1/3
373     of the offset table size)
374     stringnumber the number of the required substring
375     stringptr where to put a pointer to the substring
376    
377     Returns: if successful:
378     the length of the string, not including the zero that
379     is put on the end; can be zero
380     if not successful:
381     PCRE_ERROR_NOMEMORY (-6) failed to get store
382     PCRE_ERROR_NOSUBSTRING (-7) substring not present
383     */
384    
385     int
386     pcre_get_substring(const char *subject, int *ovector, int stringcount,
387     int stringnumber, const char **stringptr)
388     {
389     int yield;
390     char *substring;
391     if (stringnumber < 0 || stringnumber >= stringcount)
392     return PCRE_ERROR_NOSUBSTRING;
393     stringnumber *= 2;
394     yield = ovector[stringnumber+1] - ovector[stringnumber];
395     substring = (char *)(pcre_malloc)(yield + 1);
396     if (substring == NULL) return PCRE_ERROR_NOMEMORY;
397     memcpy(substring, subject + ovector[stringnumber], yield);
398     substring[yield] = 0;
399     *stringptr = substring;
400     return yield;
401     }
402    
403    
404    
405     /*************************************************
406     * Copy named captured string to new store *
407     *************************************************/
408    
409     /* This function copies a single captured substring, identified by name, into
410 nigel 91 new store. If the regex permits duplicate names, the first substring that is
411     set is chosen.
412 nigel 77
413     Arguments:
414     code the compiled regex
415     subject the subject string that was matched
416     ovector pointer to the offsets table
417     stringcount the number of substrings that were captured
418     (i.e. the yield of the pcre_exec call, unless
419     that was zero, in which case it should be 1/3
420     of the offset table size)
421     stringname the name of the required substring
422     stringptr where to put the pointer
423    
424     Returns: if successful:
425     the length of the copied string, not including the zero
426     that is put on the end; can be zero
427     if not successful:
428     PCRE_ERROR_NOMEMORY (-6) couldn't get memory
429     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
430     */
431    
432     int
433     pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
434     int stringcount, const char *stringname, const char **stringptr)
435     {
436 nigel 91 int n = get_first_set(code, stringname, ovector);
437 nigel 77 if (n <= 0) return n;
438     return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
439     }
440    
441    
442    
443    
444     /*************************************************
445     * Free store obtained by get_substring *
446     *************************************************/
447    
448     /* This function exists for the benefit of people calling PCRE from non-C
449     programs that can call its functions, but not free() or (pcre_free)() directly.
450    
451     Argument: the result of a previous pcre_get_substring()
452     Returns: nothing
453     */
454    
455     void
456     pcre_free_substring(const char *pointer)
457     {
458     (pcre_free)((void *)pointer);
459     }
460    
461     /* End of pcre_get.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12