/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (hide annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 9 months ago) by ph10
File size: 30119 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31    
32 ph10 97 #ifdef HAVE_CONFIG_H
33     # include <config.h>
34     #endif
35    
36 nigel 77 #include <stdlib.h>
37     #include <stdio.h>
38     #include <ctype.h>
39     #include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
40     #include <assert.h>
41     #include <errno.h>
42     #include <string>
43 nigel 81 #include <algorithm>
44 nigel 77 // We need this to compile the proper dll on windows/msys. This is copied
45     // from pcre_internal.h. It would probably be better just to include that.
46     #define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
47     #include "pcre.h"
48     #include "pcre_stringpiece.h"
49     #include "pcrecpp.h"
50    
51    
52     namespace pcrecpp {
53    
54     // Maximum number of args we can set
55     static const int kMaxArgs = 16;
56     static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
57    
58     // Special object that stands-in for no argument
59     Arg no_arg((void*)NULL);
60    
61     // If a regular expression has no error, its error_ field points here
62     static const string empty_string;
63    
64     // If the user doesn't ask for any options, we just use this one
65     static RE_Options default_options;
66    
67 nigel 93 void RE::Init(const string& pat, const RE_Options* options) {
68 nigel 77 pattern_ = pat;
69     if (options == NULL) {
70     options_ = default_options;
71     } else {
72     options_ = *options;
73     }
74     error_ = &empty_string;
75     re_full_ = NULL;
76     re_partial_ = NULL;
77    
78     re_partial_ = Compile(UNANCHORED);
79     if (re_partial_ != NULL) {
80     // Check for complicated patterns. The following change is
81     // conservative in that it may treat some "simple" patterns
82     // as "complex" (e.g., if the vertical bar is in a character
83     // class or is escaped). But it seems good enough.
84 nigel 93 if (strchr(pat.c_str(), '|') == NULL) {
85 nigel 77 // Simple pattern: we can use position-based checks to perform
86     // fully anchored matches
87     re_full_ = re_partial_;
88     } else {
89     // We need a special pattern for anchored matches
90     re_full_ = Compile(ANCHOR_BOTH);
91     }
92     }
93     }
94    
95 nigel 93 void RE::Cleanup() {
96 nigel 77 if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
97     if (re_partial_ != NULL) (*pcre_free)(re_partial_);
98     if (error_ != &empty_string) delete error_;
99     }
100    
101 nigel 93
102     RE::~RE() {
103     Cleanup();
104     }
105    
106    
107 nigel 77 pcre* RE::Compile(Anchor anchor) {
108     // First, convert RE_Options into pcre options
109     int pcre_options = 0;
110 nigel 81 pcre_options = options_.all_options();
111 nigel 77
112     // Special treatment for anchoring. This is needed because at
113     // runtime pcre only provides an option for anchoring at the
114     // beginning of a string (unless you use offset).
115     //
116     // There are three types of anchoring we want:
117     // UNANCHORED Compile the original pattern, and use
118     // a pcre unanchored match.
119     // ANCHOR_START Compile the original pattern, and use
120     // a pcre anchored match.
121     // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
122     // and use a pcre anchored match.
123    
124     const char* compile_error;
125     int eoffset;
126     pcre* re;
127     if (anchor != ANCHOR_BOTH) {
128     re = pcre_compile(pattern_.c_str(), pcre_options,
129     &compile_error, &eoffset, NULL);
130     } else {
131     // Tack a '\z' at the end of RE. Parenthesize it first so that
132     // the '\z' applies to all top-level alternatives in the regexp.
133     string wrapped = "(?:"; // A non-counting grouping operator
134     wrapped += pattern_;
135     wrapped += ")\\z";
136     re = pcre_compile(wrapped.c_str(), pcre_options,
137     &compile_error, &eoffset, NULL);
138     }
139     if (re == NULL) {
140     if (error_ == &empty_string) error_ = new string(compile_error);
141     }
142     return re;
143     }
144    
145     /***** Matching interfaces *****/
146    
147     bool RE::FullMatch(const StringPiece& text,
148     const Arg& ptr1,
149     const Arg& ptr2,
150     const Arg& ptr3,
151     const Arg& ptr4,
152     const Arg& ptr5,
153     const Arg& ptr6,
154     const Arg& ptr7,
155     const Arg& ptr8,
156     const Arg& ptr9,
157     const Arg& ptr10,
158     const Arg& ptr11,
159     const Arg& ptr12,
160     const Arg& ptr13,
161     const Arg& ptr14,
162     const Arg& ptr15,
163     const Arg& ptr16) const {
164     const Arg* args[kMaxArgs];
165     int n = 0;
166     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
167     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
168     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
169     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
170     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
171     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
172     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
173     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
174     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
175     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
176     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
177     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
178     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
179     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
180     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
181     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
182     done:
183    
184     int consumed;
185     int vec[kVecSize];
186     return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
187     }
188    
189     bool RE::PartialMatch(const StringPiece& text,
190     const Arg& ptr1,
191     const Arg& ptr2,
192     const Arg& ptr3,
193     const Arg& ptr4,
194     const Arg& ptr5,
195     const Arg& ptr6,
196     const Arg& ptr7,
197     const Arg& ptr8,
198     const Arg& ptr9,
199     const Arg& ptr10,
200     const Arg& ptr11,
201     const Arg& ptr12,
202     const Arg& ptr13,
203     const Arg& ptr14,
204     const Arg& ptr15,
205     const Arg& ptr16) const {
206     const Arg* args[kMaxArgs];
207     int n = 0;
208     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
209     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
210     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
211     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
212     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
213     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
214     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
215     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
216     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
217     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
218     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
219     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
220     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
221     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
222     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
223     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
224     done:
225    
226     int consumed;
227     int vec[kVecSize];
228     return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
229     }
230    
231     bool RE::Consume(StringPiece* input,
232     const Arg& ptr1,
233     const Arg& ptr2,
234     const Arg& ptr3,
235     const Arg& ptr4,
236     const Arg& ptr5,
237     const Arg& ptr6,
238     const Arg& ptr7,
239     const Arg& ptr8,
240     const Arg& ptr9,
241     const Arg& ptr10,
242     const Arg& ptr11,
243     const Arg& ptr12,
244     const Arg& ptr13,
245     const Arg& ptr14,
246     const Arg& ptr15,
247     const Arg& ptr16) const {
248     const Arg* args[kMaxArgs];
249     int n = 0;
250     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
251     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
252     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
253     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
254     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
255     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
256     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
257     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
258     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
259     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
260     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
261     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
262     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
263     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
264     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
265     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
266     done:
267    
268     int consumed;
269     int vec[kVecSize];
270     if (DoMatchImpl(*input, ANCHOR_START, &consumed,
271     args, n, vec, kVecSize)) {
272     input->remove_prefix(consumed);
273     return true;
274     } else {
275     return false;
276     }
277     }
278    
279     bool RE::FindAndConsume(StringPiece* input,
280     const Arg& ptr1,
281     const Arg& ptr2,
282     const Arg& ptr3,
283     const Arg& ptr4,
284     const Arg& ptr5,
285     const Arg& ptr6,
286     const Arg& ptr7,
287     const Arg& ptr8,
288     const Arg& ptr9,
289     const Arg& ptr10,
290     const Arg& ptr11,
291     const Arg& ptr12,
292     const Arg& ptr13,
293     const Arg& ptr14,
294     const Arg& ptr15,
295     const Arg& ptr16) const {
296     const Arg* args[kMaxArgs];
297     int n = 0;
298     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
299     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
300     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
301     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
302     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
303     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
304     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
305     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
306     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
307     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
308     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
309     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
310     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
311     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
312     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
313     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
314     done:
315    
316     int consumed;
317     int vec[kVecSize];
318     if (DoMatchImpl(*input, UNANCHORED, &consumed,
319     args, n, vec, kVecSize)) {
320     input->remove_prefix(consumed);
321     return true;
322     } else {
323     return false;
324     }
325     }
326    
327     bool RE::Replace(const StringPiece& rewrite,
328     string *str) const {
329     int vec[kVecSize];
330     int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
331     if (matches == 0)
332     return false;
333    
334     string s;
335     if (!Rewrite(&s, rewrite, *str, vec, matches))
336     return false;
337    
338     assert(vec[0] >= 0);
339     assert(vec[1] >= 0);
340     str->replace(vec[0], vec[1] - vec[0], s);
341     return true;
342     }
343    
344 nigel 91 // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
345     // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
346     static int NewlineMode(int pcre_options) {
347     // TODO: if we can make it threadsafe, cache this var
348     int newline_mode = 0;
349     /* if (newline_mode) return newline_mode; */ // do this once it's cached
350     if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
351     newline_mode = (pcre_options &
352     (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
353     } else {
354     int newline;
355     pcre_config(PCRE_CONFIG_NEWLINE, &newline);
356     if (newline == 10)
357     newline_mode = PCRE_NEWLINE_LF;
358     else if (newline == 13)
359     newline_mode = PCRE_NEWLINE_CR;
360     else if (newline == 3338)
361     newline_mode = PCRE_NEWLINE_CRLF;
362     else
363     assert("" == "Unexpected return value from pcre_config(NEWLINE)");
364     }
365     return newline_mode;
366     }
367    
368 nigel 77 int RE::GlobalReplace(const StringPiece& rewrite,
369     string *str) const {
370     int count = 0;
371     int vec[kVecSize];
372     string out;
373     int start = 0;
374     int lastend = -1;
375    
376     for (; start <= static_cast<int>(str->length()); count++) {
377     int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
378     if (matches <= 0)
379     break;
380     int matchstart = vec[0], matchend = vec[1];
381     assert(matchstart >= start);
382     assert(matchend >= matchstart);
383     if (matchstart == matchend && matchstart == lastend) {
384     // advance one character if we matched an empty string at the same
385     // place as the last match occurred
386 nigel 91 matchend = start + 1;
387     // If the current char is CR and we're in CRLF mode, skip LF too.
388     // Note it's better to call pcre_fullinfo() than to examine
389     // all_options(), since options_ could have changed bewteen
390     // compile-time and now, but this is simpler and safe enough.
391     if (start+1 < static_cast<int>(str->length()) &&
392     (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
393     NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
394     matchend++;
395     }
396     // We also need to advance more than one char if we're in utf8 mode.
397     #ifdef SUPPORT_UTF8
398     if (options_.utf8()) {
399     while (matchend < static_cast<int>(str->length()) &&
400     ((*str)[matchend] & 0xc0) == 0x80)
401     matchend++;
402     }
403     #endif
404     if (matchend <= static_cast<int>(str->length()))
405     out.append(*str, start, matchend - start);
406     start = matchend;
407 nigel 77 } else {
408     out.append(*str, start, matchstart - start);
409     Rewrite(&out, rewrite, *str, vec, matches);
410     start = matchend;
411     lastend = matchend;
412     count++;
413     }
414     }
415    
416     if (count == 0)
417     return 0;
418    
419     if (start < static_cast<int>(str->length()))
420     out.append(*str, start, str->length() - start);
421     swap(out, *str);
422     return count;
423     }
424    
425     bool RE::Extract(const StringPiece& rewrite,
426     const StringPiece& text,
427     string *out) const {
428     int vec[kVecSize];
429     int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
430     if (matches == 0)
431     return false;
432 nigel 81 out->erase();
433 nigel 77 return Rewrite(out, rewrite, text, vec, matches);
434     }
435    
436 nigel 93 /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
437     string result;
438    
439     // Escape any ascii character not in [A-Za-z_0-9].
440     //
441     // Note that it's legal to escape a character even if it has no
442     // special meaning in a regular expression -- so this function does
443     // that. (This also makes it identical to the perl function of the
444     // same name; see `perldoc -f quotemeta`.)
445     for (int ii = 0; ii < unquoted.size(); ++ii) {
446     // Note that using 'isalnum' here raises the benchmark time from
447     // 32ns to 58ns:
448     if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
449     (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
450     (unquoted[ii] < '0' || unquoted[ii] > '9') &&
451     unquoted[ii] != '_' &&
452     // If this is the part of a UTF8 or Latin1 character, we need
453     // to copy this byte without escaping. Experimentally this is
454     // what works correctly with the regexp library.
455     !(unquoted[ii] & 128)) {
456     result += '\\';
457     }
458     result += unquoted[ii];
459     }
460    
461     return result;
462     }
463    
464 nigel 77 /***** Actual matching and rewriting code *****/
465    
466     int RE::TryMatch(const StringPiece& text,
467     int startpos,
468     Anchor anchor,
469     int *vec,
470     int vecsize) const {
471     pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
472     if (re == NULL) {
473     //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
474     return 0;
475     }
476    
477     pcre_extra extra = { 0 };
478     if (options_.match_limit() > 0) {
479 nigel 87 extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
480 nigel 77 extra.match_limit = options_.match_limit();
481     }
482 nigel 87 if (options_.match_limit_recursion() > 0) {
483     extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
484     extra.match_limit_recursion = options_.match_limit_recursion();
485     }
486 nigel 77 int rc = pcre_exec(re, // The regular expression object
487     &extra,
488 nigel 87 (text.data() == NULL) ? "" : text.data(),
489 nigel 77 text.size(),
490     startpos,
491     (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
492     vec,
493     vecsize);
494    
495     // Handle errors
496     if (rc == PCRE_ERROR_NOMATCH) {
497     return 0;
498     } else if (rc < 0) {
499     //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
500     // re, pattern_.c_str());
501     return 0;
502     } else if (rc == 0) {
503     // pcre_exec() returns 0 as a special case when the number of
504     // capturing subpatterns exceeds the size of the vector.
505     // When this happens, there is a match and the output vector
506     // is filled, but we miss out on the positions of the extra subpatterns.
507     rc = vecsize / 2;
508     }
509    
510     if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {
511     // We need an extra check to make sure that the match extended
512     // to the end of the input string
513     assert(vec[0] == 0); // PCRE_ANCHORED forces starting match
514     if (vec[1] != text.size()) return 0; // Did not get ending match
515     }
516    
517     return rc;
518     }
519    
520     bool RE::DoMatchImpl(const StringPiece& text,
521     Anchor anchor,
522     int* consumed,
523     const Arg* const* args,
524     int n,
525     int* vec,
526     int vecsize) const {
527     assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
528     int matches = TryMatch(text, 0, anchor, vec, vecsize);
529     assert(matches >= 0); // TryMatch never returns negatives
530     if (matches == 0)
531     return false;
532    
533     *consumed = vec[1];
534    
535 nigel 87 if (n == 0 || args == NULL) {
536 nigel 77 // We are not interested in results
537     return true;
538     }
539    
540 nigel 87 if (NumberOfCapturingGroups() < n) {
541     // RE has fewer capturing groups than number of arg pointers passed in
542     return false;
543     }
544    
545 nigel 77 // If we got here, we must have matched the whole pattern.
546     // We do not need (can not do) any more checks on the value of 'matches' here
547     // -- see the comment for TryMatch.
548     for (int i = 0; i < n; i++) {
549     const int start = vec[2*(i+1)];
550     const int limit = vec[2*(i+1)+1];
551     if (!args[i]->Parse(text.data() + start, limit-start)) {
552     // TODO: Should we indicate what the error was?
553     return false;
554     }
555     }
556    
557     return true;
558     }
559    
560     bool RE::DoMatch(const StringPiece& text,
561     Anchor anchor,
562     int* consumed,
563     const Arg* const args[],
564     int n) const {
565     assert(n >= 0);
566     size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
567     // (as for kVecSize)
568     int space[21]; // use stack allocation for small vecsize (common case)
569     int* vec = vecsize <= 21 ? space : new int[vecsize];
570     bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
571     if (vec != space) delete [] vec;
572     return retval;
573     }
574    
575     bool RE::Rewrite(string *out, const StringPiece &rewrite,
576     const StringPiece &text, int *vec, int veclen) const {
577     for (const char *s = rewrite.data(), *end = s + rewrite.size();
578     s < end; s++) {
579     int c = *s;
580     if (c == '\\') {
581     c = *++s;
582     if (isdigit(c)) {
583     int n = (c - '0');
584     if (n >= veclen) {
585     //fprintf(stderr, requested group %d in regexp %.*s\n",
586     // n, rewrite.size(), rewrite.data());
587     return false;
588     }
589     int start = vec[2 * n];
590     if (start >= 0)
591     out->append(text.data() + start, vec[2 * n + 1] - start);
592     } else if (c == '\\') {
593     out->push_back('\\');
594     } else {
595     //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
596     // rewrite.size(), rewrite.data());
597     return false;
598     }
599     } else {
600     out->push_back(c);
601     }
602     }
603     return true;
604     }
605    
606     // Return the number of capturing subpatterns, or -1 if the
607     // regexp wasn't valid on construction.
608 nigel 87 int RE::NumberOfCapturingGroups() const {
609 nigel 77 if (re_partial_ == NULL) return -1;
610    
611     int result;
612     int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
613     NULL, // We did not study the pattern
614     PCRE_INFO_CAPTURECOUNT,
615     &result);
616     assert(pcre_retval == 0);
617     return result;
618     }
619    
620     /***** Parsers for various types *****/
621    
622     bool Arg::parse_null(const char* str, int n, void* dest) {
623     // We fail if somebody asked us to store into a non-NULL void* pointer
624     return (dest == NULL);
625     }
626    
627     bool Arg::parse_string(const char* str, int n, void* dest) {
628     reinterpret_cast<string*>(dest)->assign(str, n);
629     return true;
630     }
631    
632     bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
633     reinterpret_cast<StringPiece*>(dest)->set(str, n);
634     return true;
635     }
636    
637     bool Arg::parse_char(const char* str, int n, void* dest) {
638     if (n != 1) return false;
639     *(reinterpret_cast<char*>(dest)) = str[0];
640     return true;
641     }
642    
643     bool Arg::parse_uchar(const char* str, int n, void* dest) {
644     if (n != 1) return false;
645     *(reinterpret_cast<unsigned char*>(dest)) = str[0];
646     return true;
647     }
648    
649     // Largest number spec that we are willing to parse
650     static const int kMaxNumberLength = 32;
651    
652     // REQUIRES "buf" must have length at least kMaxNumberLength+1
653     // REQUIRES "n > 0"
654     // Copies "str" into "buf" and null-terminates if necessary.
655     // Returns one of:
656     // a. "str" if no termination is needed
657     // b. "buf" if the string was copied and null-terminated
658     // c. "" if the input was invalid and has no hope of being parsed
659     static const char* TerminateNumber(char* buf, const char* str, int n) {
660     if ((n > 0) && isspace(*str)) {
661     // We are less forgiving than the strtoxxx() routines and do not
662     // allow leading spaces.
663     return "";
664     }
665    
666     // See if the character right after the input text may potentially
667     // look like a digit.
668     if (isdigit(str[n]) ||
669     ((str[n] >= 'a') && (str[n] <= 'f')) ||
670     ((str[n] >= 'A') && (str[n] <= 'F'))) {
671     if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
672     memcpy(buf, str, n);
673     buf[n] = '\0';
674     return buf;
675     } else {
676     // We can parse right out of the supplied string, so return it.
677     return str;
678     }
679     }
680    
681     bool Arg::parse_long_radix(const char* str,
682     int n,
683     void* dest,
684     int radix) {
685     if (n == 0) return false;
686     char buf[kMaxNumberLength+1];
687     str = TerminateNumber(buf, str, n);
688     char* end;
689     errno = 0;
690     long r = strtol(str, &end, radix);
691     if (end != str + n) return false; // Leftover junk
692     if (errno) return false;
693     *(reinterpret_cast<long*>(dest)) = r;
694     return true;
695     }
696    
697     bool Arg::parse_ulong_radix(const char* str,
698     int n,
699     void* dest,
700     int radix) {
701     if (n == 0) return false;
702     char buf[kMaxNumberLength+1];
703     str = TerminateNumber(buf, str, n);
704 nigel 87 if (str[0] == '-') return false; // strtoul() on a negative number?!
705 nigel 77 char* end;
706     errno = 0;
707     unsigned long r = strtoul(str, &end, radix);
708     if (end != str + n) return false; // Leftover junk
709     if (errno) return false;
710     *(reinterpret_cast<unsigned long*>(dest)) = r;
711     return true;
712     }
713    
714     bool Arg::parse_short_radix(const char* str,
715     int n,
716     void* dest,
717     int radix) {
718     long r;
719     if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
720     if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
721     *(reinterpret_cast<short*>(dest)) = r;
722     return true;
723     }
724    
725     bool Arg::parse_ushort_radix(const char* str,
726     int n,
727     void* dest,
728     int radix) {
729     unsigned long r;
730     if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
731     if (r > USHRT_MAX) return false; // Out of range
732     *(reinterpret_cast<unsigned short*>(dest)) = r;
733     return true;
734     }
735    
736     bool Arg::parse_int_radix(const char* str,
737     int n,
738     void* dest,
739     int radix) {
740     long r;
741     if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
742     if (r < INT_MIN || r > INT_MAX) return false; // Out of range
743     *(reinterpret_cast<int*>(dest)) = r;
744     return true;
745     }
746    
747     bool Arg::parse_uint_radix(const char* str,
748     int n,
749     void* dest,
750     int radix) {
751     unsigned long r;
752     if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
753     if (r > UINT_MAX) return false; // Out of range
754     *(reinterpret_cast<unsigned int*>(dest)) = r;
755     return true;
756     }
757    
758     bool Arg::parse_longlong_radix(const char* str,
759     int n,
760     void* dest,
761     int radix) {
762     #ifndef HAVE_LONG_LONG
763     return false;
764     #else
765     if (n == 0) return false;
766     char buf[kMaxNumberLength+1];
767     str = TerminateNumber(buf, str, n);
768     char* end;
769     errno = 0;
770     #if defined HAVE_STRTOQ
771     long long r = strtoq(str, &end, radix);
772     #elif defined HAVE_STRTOLL
773     long long r = strtoll(str, &end, radix);
774     #else
775     #error parse_longlong_radix: cannot convert input to a long-long
776     #endif
777     if (end != str + n) return false; // Leftover junk
778     if (errno) return false;
779     *(reinterpret_cast<long long*>(dest)) = r;
780     return true;
781     #endif /* HAVE_LONG_LONG */
782     }
783    
784     bool Arg::parse_ulonglong_radix(const char* str,
785     int n,
786     void* dest,
787     int radix) {
788     #ifndef HAVE_UNSIGNED_LONG_LONG
789     return false;
790     #else
791     if (n == 0) return false;
792     char buf[kMaxNumberLength+1];
793     str = TerminateNumber(buf, str, n);
794 nigel 87 if (str[0] == '-') return false; // strtoull() on a negative number?!
795 nigel 77 char* end;
796     errno = 0;
797     #if defined HAVE_STRTOQ
798     unsigned long long r = strtouq(str, &end, radix);
799     #elif defined HAVE_STRTOLL
800     unsigned long long r = strtoull(str, &end, radix);
801     #else
802     #error parse_ulonglong_radix: cannot convert input to a long-long
803     #endif
804     if (end != str + n) return false; // Leftover junk
805     if (errno) return false;
806     *(reinterpret_cast<unsigned long long*>(dest)) = r;
807     return true;
808     #endif /* HAVE_UNSIGNED_LONG_LONG */
809     }
810    
811     bool Arg::parse_double(const char* str, int n, void* dest) {
812     if (n == 0) return false;
813     static const int kMaxLength = 200;
814     char buf[kMaxLength];
815     if (n >= kMaxLength) return false;
816     memcpy(buf, str, n);
817     buf[n] = '\0';
818     errno = 0;
819     char* end;
820     double r = strtod(buf, &end);
821     if (end != buf + n) return false; // Leftover junk
822     if (errno) return false;
823     *(reinterpret_cast<double*>(dest)) = r;
824     return true;
825     }
826    
827     bool Arg::parse_float(const char* str, int n, void* dest) {
828     double r;
829     if (!parse_double(str, n, &r)) return false;
830     *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
831     return true;
832     }
833    
834    
835     #define DEFINE_INTEGER_PARSERS(name) \
836     bool Arg::parse_##name(const char* str, int n, void* dest) { \
837     return parse_##name##_radix(str, n, dest, 10); \
838     } \
839     bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
840     return parse_##name##_radix(str, n, dest, 16); \
841     } \
842     bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
843     return parse_##name##_radix(str, n, dest, 8); \
844     } \
845     bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
846     return parse_##name##_radix(str, n, dest, 0); \
847     }
848    
849 nigel 93 DEFINE_INTEGER_PARSERS(short) /* */
850     DEFINE_INTEGER_PARSERS(ushort) /* */
851     DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
852     DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
853     DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
854     DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
855     DEFINE_INTEGER_PARSERS(longlong) /* */
856     DEFINE_INTEGER_PARSERS(ulonglong) /* */
857 nigel 77
858     #undef DEFINE_INTEGER_PARSERS
859    
860     } // namespace pcrecpp

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12