/[pcre]/code/trunk/pcre_get.c
ViewVC logotype

Contents of /code/trunk/pcre_get.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Tue Jul 31 14:39:09 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 15315 byte(s)
Daniel's patch for config.h and Windows DLL declarations (not fully working).

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains some convenience functions for extracting substrings
42     from the subject string after a regex match has succeeded. The original idea
43     for these functions came from Scott Wimer. */
44    
45    
46 ph10 199 #ifdef HAVE_CONFIG_H
47     #include <config.h>
48     #endif
49    
50 nigel 77 #include "pcre_internal.h"
51    
52    
53     /*************************************************
54     * Find number for named string *
55     *************************************************/
56    
57 nigel 91 /* This function is used by the get_first_set() function below, as well
58     as being generally available. It assumes that names are unique.
59 nigel 77
60     Arguments:
61     code the compiled regex
62     stringname the name whose number is required
63    
64     Returns: the number of the named parentheses, or a negative number
65     (PCRE_ERROR_NOSUBSTRING) if not found
66     */
67    
68     int
69     pcre_get_stringnumber(const pcre *code, const char *stringname)
70     {
71     int rc;
72     int entrysize;
73     int top, bot;
74     uschar *nametable;
75    
76     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
77     return rc;
78     if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
79    
80     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
81     return rc;
82     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
83     return rc;
84    
85     bot = 0;
86     while (top > bot)
87     {
88     int mid = (top + bot) / 2;
89     uschar *entry = nametable + entrysize*mid;
90     int c = strcmp(stringname, (char *)(entry + 2));
91     if (c == 0) return (entry[0] << 8) + entry[1];
92     if (c > 0) bot = mid + 1; else top = mid;
93     }
94    
95     return PCRE_ERROR_NOSUBSTRING;
96     }
97    
98    
99    
100     /*************************************************
101 nigel 91 * Find (multiple) entries for named string *
102     *************************************************/
103    
104     /* This is used by the get_first_set() function below, as well as being
105     generally available. It is used when duplicated names are permitted.
106    
107     Arguments:
108     code the compiled regex
109     stringname the name whose entries required
110     firstptr where to put the pointer to the first entry
111     lastptr where to put the pointer to the last entry
112    
113     Returns: the length of each entry, or a negative number
114     (PCRE_ERROR_NOSUBSTRING) if not found
115     */
116    
117     int
118     pcre_get_stringtable_entries(const pcre *code, const char *stringname,
119     char **firstptr, char **lastptr)
120     {
121     int rc;
122     int entrysize;
123     int top, bot;
124     uschar *nametable, *lastentry;
125    
126     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
127     return rc;
128     if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
129    
130     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
131     return rc;
132     if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
133     return rc;
134    
135     lastentry = nametable + entrysize * (top - 1);
136     bot = 0;
137     while (top > bot)
138     {
139     int mid = (top + bot) / 2;
140     uschar *entry = nametable + entrysize*mid;
141     int c = strcmp(stringname, (char *)(entry + 2));
142     if (c == 0)
143     {
144     uschar *first = entry;
145     uschar *last = entry;
146     while (first > nametable)
147     {
148     if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
149     first -= entrysize;
150     }
151     while (last < lastentry)
152     {
153     if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
154     last += entrysize;
155     }
156     *firstptr = (char *)first;
157     *lastptr = (char *)last;
158     return entrysize;
159     }
160     if (c > 0) bot = mid + 1; else top = mid;
161     }
162    
163     return PCRE_ERROR_NOSUBSTRING;
164     }
165    
166    
167    
168     /*************************************************
169     * Find first set of multiple named strings *
170     *************************************************/
171    
172     /* This function allows for duplicate names in the table of named substrings.
173     It returns the number of the first one that was set in a pattern match.
174    
175     Arguments:
176     code the compiled regex
177     stringname the name of the capturing substring
178     ovector the vector of matched substrings
179    
180     Returns: the number of the first that is set,
181     or the number of the last one if none are set,
182     or a negative number on error
183     */
184    
185     static int
186     get_first_set(const pcre *code, const char *stringname, int *ovector)
187     {
188     const real_pcre *re = (const real_pcre *)code;
189     int entrysize;
190     char *first, *last;
191     uschar *entry;
192     if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
193     return pcre_get_stringnumber(code, stringname);
194     entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
195     if (entrysize <= 0) return entrysize;
196     for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
197     {
198     int n = (entry[0] << 8) + entry[1];
199     if (ovector[n*2] >= 0) return n;
200     }
201     return (first[0] << 8) + first[1];
202     }
203    
204    
205    
206    
207     /*************************************************
208 nigel 77 * Copy captured string to given buffer *
209     *************************************************/
210    
211     /* This function copies a single captured substring into a given buffer.
212     Note that we use memcpy() rather than strncpy() in case there are binary zeros
213     in the string.
214    
215     Arguments:
216     subject the subject string that was matched
217     ovector pointer to the offsets table
218     stringcount the number of substrings that were captured
219     (i.e. the yield of the pcre_exec call, unless
220     that was zero, in which case it should be 1/3
221     of the offset table size)
222     stringnumber the number of the required substring
223     buffer where to put the substring
224     size the size of the buffer
225    
226     Returns: if successful:
227     the length of the copied string, not including the zero
228     that is put on the end; can be zero
229     if not successful:
230     PCRE_ERROR_NOMEMORY (-6) buffer too small
231     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
232     */
233    
234     int
235     pcre_copy_substring(const char *subject, int *ovector, int stringcount,
236     int stringnumber, char *buffer, int size)
237     {
238     int yield;
239     if (stringnumber < 0 || stringnumber >= stringcount)
240     return PCRE_ERROR_NOSUBSTRING;
241     stringnumber *= 2;
242     yield = ovector[stringnumber+1] - ovector[stringnumber];
243     if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
244     memcpy(buffer, subject + ovector[stringnumber], yield);
245     buffer[yield] = 0;
246     return yield;
247     }
248    
249    
250    
251     /*************************************************
252     * Copy named captured string to given buffer *
253     *************************************************/
254    
255     /* This function copies a single captured substring into a given buffer,
256 nigel 91 identifying it by name. If the regex permits duplicate names, the first
257     substring that is set is chosen.
258 nigel 77
259     Arguments:
260     code the compiled regex
261     subject the subject string that was matched
262     ovector pointer to the offsets table
263     stringcount the number of substrings that were captured
264     (i.e. the yield of the pcre_exec call, unless
265     that was zero, in which case it should be 1/3
266     of the offset table size)
267     stringname the name of the required substring
268     buffer where to put the substring
269     size the size of the buffer
270    
271     Returns: if successful:
272     the length of the copied string, not including the zero
273     that is put on the end; can be zero
274     if not successful:
275     PCRE_ERROR_NOMEMORY (-6) buffer too small
276     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
277     */
278    
279     int
280     pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
281     int stringcount, const char *stringname, char *buffer, int size)
282     {
283 nigel 91 int n = get_first_set(code, stringname, ovector);
284 nigel 77 if (n <= 0) return n;
285     return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
286     }
287    
288    
289    
290     /*************************************************
291     * Copy all captured strings to new store *
292     *************************************************/
293    
294     /* This function gets one chunk of store and builds a list of pointers and all
295     of the captured substrings in it. A NULL pointer is put on the end of the list.
296    
297     Arguments:
298     subject the subject string that was matched
299     ovector pointer to the offsets table
300     stringcount the number of substrings that were captured
301     (i.e. the yield of the pcre_exec call, unless
302     that was zero, in which case it should be 1/3
303     of the offset table size)
304     listptr set to point to the list of pointers
305    
306     Returns: if successful: 0
307     if not successful:
308     PCRE_ERROR_NOMEMORY (-6) failed to get store
309     */
310    
311     int
312     pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
313     const char ***listptr)
314     {
315     int i;
316     int size = sizeof(char *);
317     int double_count = stringcount * 2;
318     char **stringlist;
319     char *p;
320    
321     for (i = 0; i < double_count; i += 2)
322     size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
323    
324     stringlist = (char **)(pcre_malloc)(size);
325     if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
326    
327     *listptr = (const char **)stringlist;
328     p = (char *)(stringlist + stringcount + 1);
329    
330     for (i = 0; i < double_count; i += 2)
331     {
332     int len = ovector[i+1] - ovector[i];
333     memcpy(p, subject + ovector[i], len);
334     *stringlist++ = p;
335     p += len;
336     *p++ = 0;
337     }
338    
339     *stringlist = NULL;
340     return 0;
341     }
342    
343    
344    
345     /*************************************************
346     * Free store obtained by get_substring_list *
347     *************************************************/
348    
349     /* This function exists for the benefit of people calling PCRE from non-C
350     programs that can call its functions, but not free() or (pcre_free)() directly.
351    
352     Argument: the result of a previous pcre_get_substring_list()
353     Returns: nothing
354     */
355    
356     void
357     pcre_free_substring_list(const char **pointer)
358     {
359     (pcre_free)((void *)pointer);
360     }
361    
362    
363    
364     /*************************************************
365     * Copy captured string to new store *
366     *************************************************/
367    
368     /* This function copies a single captured substring into a piece of new
369     store
370    
371     Arguments:
372     subject the subject string that was matched
373     ovector pointer to the offsets table
374     stringcount the number of substrings that were captured
375     (i.e. the yield of the pcre_exec call, unless
376     that was zero, in which case it should be 1/3
377     of the offset table size)
378     stringnumber the number of the required substring
379     stringptr where to put a pointer to the substring
380    
381     Returns: if successful:
382     the length of the string, not including the zero that
383     is put on the end; can be zero
384     if not successful:
385     PCRE_ERROR_NOMEMORY (-6) failed to get store
386     PCRE_ERROR_NOSUBSTRING (-7) substring not present
387     */
388    
389     int
390     pcre_get_substring(const char *subject, int *ovector, int stringcount,
391     int stringnumber, const char **stringptr)
392     {
393     int yield;
394     char *substring;
395     if (stringnumber < 0 || stringnumber >= stringcount)
396     return PCRE_ERROR_NOSUBSTRING;
397     stringnumber *= 2;
398     yield = ovector[stringnumber+1] - ovector[stringnumber];
399     substring = (char *)(pcre_malloc)(yield + 1);
400     if (substring == NULL) return PCRE_ERROR_NOMEMORY;
401     memcpy(substring, subject + ovector[stringnumber], yield);
402     substring[yield] = 0;
403     *stringptr = substring;
404     return yield;
405     }
406    
407    
408    
409     /*************************************************
410     * Copy named captured string to new store *
411     *************************************************/
412    
413     /* This function copies a single captured substring, identified by name, into
414 nigel 91 new store. If the regex permits duplicate names, the first substring that is
415     set is chosen.
416 nigel 77
417     Arguments:
418     code the compiled regex
419     subject the subject string that was matched
420     ovector pointer to the offsets table
421     stringcount the number of substrings that were captured
422     (i.e. the yield of the pcre_exec call, unless
423     that was zero, in which case it should be 1/3
424     of the offset table size)
425     stringname the name of the required substring
426     stringptr where to put the pointer
427    
428     Returns: if successful:
429     the length of the copied string, not including the zero
430     that is put on the end; can be zero
431     if not successful:
432     PCRE_ERROR_NOMEMORY (-6) couldn't get memory
433     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
434     */
435    
436     int
437     pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
438     int stringcount, const char *stringname, const char **stringptr)
439     {
440 nigel 91 int n = get_first_set(code, stringname, ovector);
441 nigel 77 if (n <= 0) return n;
442     return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
443     }
444    
445    
446    
447    
448     /*************************************************
449     * Free store obtained by get_substring *
450     *************************************************/
451    
452     /* This function exists for the benefit of people calling PCRE from non-C
453     programs that can call its functions, but not free() or (pcre_free)() directly.
454    
455     Argument: the result of a previous pcre_get_substring()
456     Returns: nothing
457     */
458    
459     void
460     pcre_free_substring(const char *pointer)
461     {
462     (pcre_free)((void *)pointer);
463     }
464    
465     /* End of pcre_get.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12