/[pcre]/code/branches/pcre16/pcrecpp.cc
ViewVC logotype

Contents of /code/branches/pcre16/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 200 - (hide annotations) (download)
Wed Aug 1 09:10:40 2007 UTC (7 years, 4 months ago) by ph10
Original Path: code/trunk/pcrecpp.cc
File size: 29077 byte(s)
Correct errors in previous patch; tidy for test release.

1 nigel 77 // Copyright (c) 2005, Google Inc.
2     // All rights reserved.
3     //
4     // Redistribution and use in source and binary forms, with or without
5     // modification, are permitted provided that the following conditions are
6     // met:
7     //
8     // * Redistributions of source code must retain the above copyright
9     // notice, this list of conditions and the following disclaimer.
10     // * Redistributions in binary form must reproduce the above
11     // copyright notice, this list of conditions and the following disclaimer
12     // in the documentation and/or other materials provided with the
13     // distribution.
14     // * Neither the name of Google Inc. nor the names of its
15     // contributors may be used to endorse or promote products derived from
16     // this software without specific prior written permission.
17     //
18     // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19     // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20     // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21     // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22     // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23     // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24     // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25     // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26     // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27     // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28     // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29     //
30     // Author: Sanjay Ghemawat
31    
32 ph10 97 #ifdef HAVE_CONFIG_H
33 ph10 199 #include <config.h>
34 ph10 97 #endif
35    
36 nigel 77 #include <stdlib.h>
37     #include <stdio.h>
38     #include <ctype.h>
39     #include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
40     #include <assert.h>
41     #include <errno.h>
42     #include <string>
43 nigel 81 #include <algorithm>
44 ph10 199
45     #include "pcrecpp_internal.h"
46 ph10 137 #include <pcre.h>
47 nigel 77 #include "pcrecpp.h"
48 ph10 199 #include "pcre_stringpiece.h"
49 nigel 77
50    
51     namespace pcrecpp {
52    
53     // Maximum number of args we can set
54     static const int kMaxArgs = 16;
55     static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
56    
57     // Special object that stands-in for no argument
58 ph10 200 PCRECPP_EXP_DEFN Arg no_arg((void*)NULL);
59 nigel 77
60     // If a regular expression has no error, its error_ field points here
61     static const string empty_string;
62    
63     // If the user doesn't ask for any options, we just use this one
64     static RE_Options default_options;
65    
66 nigel 93 void RE::Init(const string& pat, const RE_Options* options) {
67 nigel 77 pattern_ = pat;
68     if (options == NULL) {
69     options_ = default_options;
70     } else {
71     options_ = *options;
72     }
73     error_ = &empty_string;
74     re_full_ = NULL;
75     re_partial_ = NULL;
76    
77     re_partial_ = Compile(UNANCHORED);
78     if (re_partial_ != NULL) {
79 ph10 179 re_full_ = Compile(ANCHOR_BOTH);
80 nigel 77 }
81     }
82    
83 nigel 93 void RE::Cleanup() {
84 ph10 179 if (re_full_ != NULL) (*pcre_free)(re_full_);
85     if (re_partial_ != NULL) (*pcre_free)(re_partial_);
86     if (error_ != &empty_string) delete error_;
87 nigel 77 }
88    
89 nigel 93
90     RE::~RE() {
91     Cleanup();
92     }
93    
94    
95 nigel 77 pcre* RE::Compile(Anchor anchor) {
96     // First, convert RE_Options into pcre options
97     int pcre_options = 0;
98 nigel 81 pcre_options = options_.all_options();
99 nigel 77
100     // Special treatment for anchoring. This is needed because at
101     // runtime pcre only provides an option for anchoring at the
102     // beginning of a string (unless you use offset).
103     //
104     // There are three types of anchoring we want:
105     // UNANCHORED Compile the original pattern, and use
106     // a pcre unanchored match.
107     // ANCHOR_START Compile the original pattern, and use
108     // a pcre anchored match.
109     // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
110     // and use a pcre anchored match.
111    
112     const char* compile_error;
113     int eoffset;
114     pcre* re;
115     if (anchor != ANCHOR_BOTH) {
116     re = pcre_compile(pattern_.c_str(), pcre_options,
117     &compile_error, &eoffset, NULL);
118     } else {
119     // Tack a '\z' at the end of RE. Parenthesize it first so that
120     // the '\z' applies to all top-level alternatives in the regexp.
121     string wrapped = "(?:"; // A non-counting grouping operator
122     wrapped += pattern_;
123     wrapped += ")\\z";
124     re = pcre_compile(wrapped.c_str(), pcre_options,
125     &compile_error, &eoffset, NULL);
126     }
127     if (re == NULL) {
128     if (error_ == &empty_string) error_ = new string(compile_error);
129     }
130     return re;
131     }
132    
133     /***** Matching interfaces *****/
134    
135     bool RE::FullMatch(const StringPiece& text,
136     const Arg& ptr1,
137     const Arg& ptr2,
138     const Arg& ptr3,
139     const Arg& ptr4,
140     const Arg& ptr5,
141     const Arg& ptr6,
142     const Arg& ptr7,
143     const Arg& ptr8,
144     const Arg& ptr9,
145     const Arg& ptr10,
146     const Arg& ptr11,
147     const Arg& ptr12,
148     const Arg& ptr13,
149     const Arg& ptr14,
150     const Arg& ptr15,
151     const Arg& ptr16) const {
152     const Arg* args[kMaxArgs];
153     int n = 0;
154     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
155     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
156     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
157     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
158     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
159     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
160     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
161     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
162     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
163     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
164     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
165     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
166     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
167     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
168     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
169     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
170     done:
171    
172     int consumed;
173     int vec[kVecSize];
174     return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
175     }
176    
177     bool RE::PartialMatch(const StringPiece& text,
178     const Arg& ptr1,
179     const Arg& ptr2,
180     const Arg& ptr3,
181     const Arg& ptr4,
182     const Arg& ptr5,
183     const Arg& ptr6,
184     const Arg& ptr7,
185     const Arg& ptr8,
186     const Arg& ptr9,
187     const Arg& ptr10,
188     const Arg& ptr11,
189     const Arg& ptr12,
190     const Arg& ptr13,
191     const Arg& ptr14,
192     const Arg& ptr15,
193     const Arg& ptr16) const {
194     const Arg* args[kMaxArgs];
195     int n = 0;
196     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
197     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
198     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
199     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
200     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
201     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
202     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
203     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
204     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
205     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
206     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
207     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
208     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
209     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
210     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
211     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
212     done:
213    
214     int consumed;
215     int vec[kVecSize];
216     return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
217     }
218    
219     bool RE::Consume(StringPiece* input,
220     const Arg& ptr1,
221     const Arg& ptr2,
222     const Arg& ptr3,
223     const Arg& ptr4,
224     const Arg& ptr5,
225     const Arg& ptr6,
226     const Arg& ptr7,
227     const Arg& ptr8,
228     const Arg& ptr9,
229     const Arg& ptr10,
230     const Arg& ptr11,
231     const Arg& ptr12,
232     const Arg& ptr13,
233     const Arg& ptr14,
234     const Arg& ptr15,
235     const Arg& ptr16) const {
236     const Arg* args[kMaxArgs];
237     int n = 0;
238     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
239     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
240     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
241     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
242     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
243     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
244     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
245     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
246     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
247     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
248     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
249     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
250     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
251     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
252     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
253     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
254     done:
255    
256     int consumed;
257     int vec[kVecSize];
258     if (DoMatchImpl(*input, ANCHOR_START, &consumed,
259     args, n, vec, kVecSize)) {
260     input->remove_prefix(consumed);
261     return true;
262     } else {
263     return false;
264     }
265     }
266    
267     bool RE::FindAndConsume(StringPiece* input,
268     const Arg& ptr1,
269     const Arg& ptr2,
270     const Arg& ptr3,
271     const Arg& ptr4,
272     const Arg& ptr5,
273     const Arg& ptr6,
274     const Arg& ptr7,
275     const Arg& ptr8,
276     const Arg& ptr9,
277     const Arg& ptr10,
278     const Arg& ptr11,
279     const Arg& ptr12,
280     const Arg& ptr13,
281     const Arg& ptr14,
282     const Arg& ptr15,
283     const Arg& ptr16) const {
284     const Arg* args[kMaxArgs];
285     int n = 0;
286     if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
287     if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
288     if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
289     if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
290     if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
291     if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
292     if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
293     if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
294     if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
295     if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
296     if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
297     if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
298     if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
299     if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
300     if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
301     if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
302     done:
303    
304     int consumed;
305     int vec[kVecSize];
306     if (DoMatchImpl(*input, UNANCHORED, &consumed,
307     args, n, vec, kVecSize)) {
308     input->remove_prefix(consumed);
309     return true;
310     } else {
311     return false;
312     }
313     }
314    
315     bool RE::Replace(const StringPiece& rewrite,
316     string *str) const {
317     int vec[kVecSize];
318     int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
319     if (matches == 0)
320     return false;
321    
322     string s;
323     if (!Rewrite(&s, rewrite, *str, vec, matches))
324     return false;
325    
326     assert(vec[0] >= 0);
327     assert(vec[1] >= 0);
328     str->replace(vec[0], vec[1] - vec[0], s);
329     return true;
330     }
331    
332 nigel 91 // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
333     // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
334     static int NewlineMode(int pcre_options) {
335     // TODO: if we can make it threadsafe, cache this var
336     int newline_mode = 0;
337     /* if (newline_mode) return newline_mode; */ // do this once it's cached
338     if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
339     newline_mode = (pcre_options &
340     (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
341     } else {
342     int newline;
343     pcre_config(PCRE_CONFIG_NEWLINE, &newline);
344     if (newline == 10)
345     newline_mode = PCRE_NEWLINE_LF;
346     else if (newline == 13)
347     newline_mode = PCRE_NEWLINE_CR;
348     else if (newline == 3338)
349     newline_mode = PCRE_NEWLINE_CRLF;
350     else
351     assert("" == "Unexpected return value from pcre_config(NEWLINE)");
352     }
353     return newline_mode;
354     }
355    
356 nigel 77 int RE::GlobalReplace(const StringPiece& rewrite,
357     string *str) const {
358     int count = 0;
359     int vec[kVecSize];
360     string out;
361     int start = 0;
362     int lastend = -1;
363    
364     for (; start <= static_cast<int>(str->length()); count++) {
365     int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
366     if (matches <= 0)
367     break;
368     int matchstart = vec[0], matchend = vec[1];
369     assert(matchstart >= start);
370     assert(matchend >= matchstart);
371     if (matchstart == matchend && matchstart == lastend) {
372     // advance one character if we matched an empty string at the same
373     // place as the last match occurred
374 nigel 91 matchend = start + 1;
375     // If the current char is CR and we're in CRLF mode, skip LF too.
376     // Note it's better to call pcre_fullinfo() than to examine
377     // all_options(), since options_ could have changed bewteen
378     // compile-time and now, but this is simpler and safe enough.
379     if (start+1 < static_cast<int>(str->length()) &&
380     (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
381     NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
382     matchend++;
383     }
384     // We also need to advance more than one char if we're in utf8 mode.
385     #ifdef SUPPORT_UTF8
386     if (options_.utf8()) {
387     while (matchend < static_cast<int>(str->length()) &&
388     ((*str)[matchend] & 0xc0) == 0x80)
389     matchend++;
390     }
391     #endif
392     if (matchend <= static_cast<int>(str->length()))
393     out.append(*str, start, matchend - start);
394     start = matchend;
395 nigel 77 } else {
396     out.append(*str, start, matchstart - start);
397     Rewrite(&out, rewrite, *str, vec, matches);
398     start = matchend;
399     lastend = matchend;
400     count++;
401     }
402     }
403    
404     if (count == 0)
405     return 0;
406    
407     if (start < static_cast<int>(str->length()))
408     out.append(*str, start, str->length() - start);
409     swap(out, *str);
410     return count;
411     }
412    
413     bool RE::Extract(const StringPiece& rewrite,
414     const StringPiece& text,
415     string *out) const {
416     int vec[kVecSize];
417     int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
418     if (matches == 0)
419     return false;
420 nigel 81 out->erase();
421 nigel 77 return Rewrite(out, rewrite, text, vec, matches);
422     }
423    
424 nigel 93 /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
425     string result;
426    
427     // Escape any ascii character not in [A-Za-z_0-9].
428     //
429     // Note that it's legal to escape a character even if it has no
430     // special meaning in a regular expression -- so this function does
431     // that. (This also makes it identical to the perl function of the
432     // same name; see `perldoc -f quotemeta`.)
433     for (int ii = 0; ii < unquoted.size(); ++ii) {
434     // Note that using 'isalnum' here raises the benchmark time from
435     // 32ns to 58ns:
436     if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
437     (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
438     (unquoted[ii] < '0' || unquoted[ii] > '9') &&
439     unquoted[ii] != '_' &&
440     // If this is the part of a UTF8 or Latin1 character, we need
441     // to copy this byte without escaping. Experimentally this is
442     // what works correctly with the regexp library.
443     !(unquoted[ii] & 128)) {
444     result += '\\';
445     }
446     result += unquoted[ii];
447     }
448    
449     return result;
450     }
451    
452 nigel 77 /***** Actual matching and rewriting code *****/
453    
454     int RE::TryMatch(const StringPiece& text,
455     int startpos,
456     Anchor anchor,
457     int *vec,
458     int vecsize) const {
459     pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
460     if (re == NULL) {
461     //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
462     return 0;
463     }
464    
465 ph10 199 pcre_extra extra = { 0, 0, 0, 0, 0, 0 };
466 nigel 77 if (options_.match_limit() > 0) {
467 nigel 87 extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
468 nigel 77 extra.match_limit = options_.match_limit();
469     }
470 nigel 87 if (options_.match_limit_recursion() > 0) {
471     extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
472     extra.match_limit_recursion = options_.match_limit_recursion();
473     }
474 nigel 77 int rc = pcre_exec(re, // The regular expression object
475     &extra,
476 nigel 87 (text.data() == NULL) ? "" : text.data(),
477 nigel 77 text.size(),
478     startpos,
479     (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
480     vec,
481     vecsize);
482    
483     // Handle errors
484     if (rc == PCRE_ERROR_NOMATCH) {
485     return 0;
486     } else if (rc < 0) {
487     //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
488     // re, pattern_.c_str());
489     return 0;
490     } else if (rc == 0) {
491     // pcre_exec() returns 0 as a special case when the number of
492     // capturing subpatterns exceeds the size of the vector.
493     // When this happens, there is a match and the output vector
494     // is filled, but we miss out on the positions of the extra subpatterns.
495     rc = vecsize / 2;
496     }
497    
498     return rc;
499     }
500    
501     bool RE::DoMatchImpl(const StringPiece& text,
502     Anchor anchor,
503     int* consumed,
504     const Arg* const* args,
505     int n,
506     int* vec,
507     int vecsize) const {
508     assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
509     int matches = TryMatch(text, 0, anchor, vec, vecsize);
510     assert(matches >= 0); // TryMatch never returns negatives
511     if (matches == 0)
512     return false;
513    
514     *consumed = vec[1];
515    
516 nigel 87 if (n == 0 || args == NULL) {
517 nigel 77 // We are not interested in results
518     return true;
519     }
520    
521 nigel 87 if (NumberOfCapturingGroups() < n) {
522     // RE has fewer capturing groups than number of arg pointers passed in
523     return false;
524     }
525    
526 nigel 77 // If we got here, we must have matched the whole pattern.
527     // We do not need (can not do) any more checks on the value of 'matches' here
528     // -- see the comment for TryMatch.
529     for (int i = 0; i < n; i++) {
530     const int start = vec[2*(i+1)];
531     const int limit = vec[2*(i+1)+1];
532     if (!args[i]->Parse(text.data() + start, limit-start)) {
533     // TODO: Should we indicate what the error was?
534     return false;
535     }
536     }
537    
538     return true;
539     }
540    
541     bool RE::DoMatch(const StringPiece& text,
542     Anchor anchor,
543     int* consumed,
544     const Arg* const args[],
545     int n) const {
546     assert(n >= 0);
547     size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
548     // (as for kVecSize)
549     int space[21]; // use stack allocation for small vecsize (common case)
550     int* vec = vecsize <= 21 ? space : new int[vecsize];
551     bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
552     if (vec != space) delete [] vec;
553     return retval;
554     }
555    
556     bool RE::Rewrite(string *out, const StringPiece &rewrite,
557     const StringPiece &text, int *vec, int veclen) const {
558     for (const char *s = rewrite.data(), *end = s + rewrite.size();
559     s < end; s++) {
560     int c = *s;
561     if (c == '\\') {
562     c = *++s;
563     if (isdigit(c)) {
564     int n = (c - '0');
565     if (n >= veclen) {
566     //fprintf(stderr, requested group %d in regexp %.*s\n",
567     // n, rewrite.size(), rewrite.data());
568     return false;
569     }
570     int start = vec[2 * n];
571     if (start >= 0)
572     out->append(text.data() + start, vec[2 * n + 1] - start);
573     } else if (c == '\\') {
574     out->push_back('\\');
575     } else {
576     //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
577     // rewrite.size(), rewrite.data());
578     return false;
579     }
580     } else {
581     out->push_back(c);
582     }
583     }
584     return true;
585     }
586    
587     // Return the number of capturing subpatterns, or -1 if the
588     // regexp wasn't valid on construction.
589 nigel 87 int RE::NumberOfCapturingGroups() const {
590 nigel 77 if (re_partial_ == NULL) return -1;
591    
592     int result;
593     int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
594     NULL, // We did not study the pattern
595     PCRE_INFO_CAPTURECOUNT,
596     &result);
597     assert(pcre_retval == 0);
598     return result;
599     }
600    
601     /***** Parsers for various types *****/
602    
603     bool Arg::parse_null(const char* str, int n, void* dest) {
604     // We fail if somebody asked us to store into a non-NULL void* pointer
605     return (dest == NULL);
606     }
607    
608     bool Arg::parse_string(const char* str, int n, void* dest) {
609     reinterpret_cast<string*>(dest)->assign(str, n);
610     return true;
611     }
612    
613     bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
614     reinterpret_cast<StringPiece*>(dest)->set(str, n);
615     return true;
616     }
617    
618     bool Arg::parse_char(const char* str, int n, void* dest) {
619     if (n != 1) return false;
620     *(reinterpret_cast<char*>(dest)) = str[0];
621     return true;
622     }
623    
624     bool Arg::parse_uchar(const char* str, int n, void* dest) {
625     if (n != 1) return false;
626     *(reinterpret_cast<unsigned char*>(dest)) = str[0];
627     return true;
628     }
629    
630     // Largest number spec that we are willing to parse
631     static const int kMaxNumberLength = 32;
632    
633     // REQUIRES "buf" must have length at least kMaxNumberLength+1
634     // REQUIRES "n > 0"
635     // Copies "str" into "buf" and null-terminates if necessary.
636     // Returns one of:
637     // a. "str" if no termination is needed
638     // b. "buf" if the string was copied and null-terminated
639     // c. "" if the input was invalid and has no hope of being parsed
640     static const char* TerminateNumber(char* buf, const char* str, int n) {
641     if ((n > 0) && isspace(*str)) {
642     // We are less forgiving than the strtoxxx() routines and do not
643     // allow leading spaces.
644     return "";
645     }
646    
647     // See if the character right after the input text may potentially
648     // look like a digit.
649     if (isdigit(str[n]) ||
650     ((str[n] >= 'a') && (str[n] <= 'f')) ||
651     ((str[n] >= 'A') && (str[n] <= 'F'))) {
652     if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
653     memcpy(buf, str, n);
654     buf[n] = '\0';
655     return buf;
656     } else {
657     // We can parse right out of the supplied string, so return it.
658     return str;
659     }
660     }
661    
662     bool Arg::parse_long_radix(const char* str,
663     int n,
664     void* dest,
665     int radix) {
666     if (n == 0) return false;
667     char buf[kMaxNumberLength+1];
668     str = TerminateNumber(buf, str, n);
669     char* end;
670     errno = 0;
671     long r = strtol(str, &end, radix);
672     if (end != str + n) return false; // Leftover junk
673     if (errno) return false;
674     *(reinterpret_cast<long*>(dest)) = r;
675     return true;
676     }
677    
678     bool Arg::parse_ulong_radix(const char* str,
679     int n,
680     void* dest,
681     int radix) {
682     if (n == 0) return false;
683     char buf[kMaxNumberLength+1];
684     str = TerminateNumber(buf, str, n);
685 nigel 87 if (str[0] == '-') return false; // strtoul() on a negative number?!
686 nigel 77 char* end;
687     errno = 0;
688     unsigned long r = strtoul(str, &end, radix);
689     if (end != str + n) return false; // Leftover junk
690     if (errno) return false;
691     *(reinterpret_cast<unsigned long*>(dest)) = r;
692     return true;
693     }
694    
695     bool Arg::parse_short_radix(const char* str,
696     int n,
697     void* dest,
698     int radix) {
699     long r;
700     if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
701     if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
702     *(reinterpret_cast<short*>(dest)) = r;
703     return true;
704     }
705    
706     bool Arg::parse_ushort_radix(const char* str,
707     int n,
708     void* dest,
709     int radix) {
710     unsigned long r;
711     if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
712     if (r > USHRT_MAX) return false; // Out of range
713     *(reinterpret_cast<unsigned short*>(dest)) = r;
714     return true;
715     }
716    
717     bool Arg::parse_int_radix(const char* str,
718     int n,
719     void* dest,
720     int radix) {
721     long r;
722     if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
723     if (r < INT_MIN || r > INT_MAX) return false; // Out of range
724     *(reinterpret_cast<int*>(dest)) = r;
725     return true;
726     }
727    
728     bool Arg::parse_uint_radix(const char* str,
729     int n,
730     void* dest,
731     int radix) {
732     unsigned long r;
733     if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
734     if (r > UINT_MAX) return false; // Out of range
735     *(reinterpret_cast<unsigned int*>(dest)) = r;
736     return true;
737     }
738    
739     bool Arg::parse_longlong_radix(const char* str,
740     int n,
741     void* dest,
742     int radix) {
743     #ifndef HAVE_LONG_LONG
744     return false;
745     #else
746     if (n == 0) return false;
747     char buf[kMaxNumberLength+1];
748     str = TerminateNumber(buf, str, n);
749     char* end;
750     errno = 0;
751     #if defined HAVE_STRTOQ
752     long long r = strtoq(str, &end, radix);
753     #elif defined HAVE_STRTOLL
754     long long r = strtoll(str, &end, radix);
755     #else
756     #error parse_longlong_radix: cannot convert input to a long-long
757     #endif
758     if (end != str + n) return false; // Leftover junk
759     if (errno) return false;
760     *(reinterpret_cast<long long*>(dest)) = r;
761     return true;
762     #endif /* HAVE_LONG_LONG */
763     }
764    
765     bool Arg::parse_ulonglong_radix(const char* str,
766     int n,
767     void* dest,
768     int radix) {
769     #ifndef HAVE_UNSIGNED_LONG_LONG
770     return false;
771     #else
772     if (n == 0) return false;
773     char buf[kMaxNumberLength+1];
774     str = TerminateNumber(buf, str, n);
775 nigel 87 if (str[0] == '-') return false; // strtoull() on a negative number?!
776 nigel 77 char* end;
777     errno = 0;
778     #if defined HAVE_STRTOQ
779     unsigned long long r = strtouq(str, &end, radix);
780     #elif defined HAVE_STRTOLL
781     unsigned long long r = strtoull(str, &end, radix);
782     #else
783     #error parse_ulonglong_radix: cannot convert input to a long-long
784     #endif
785     if (end != str + n) return false; // Leftover junk
786     if (errno) return false;
787     *(reinterpret_cast<unsigned long long*>(dest)) = r;
788     return true;
789     #endif /* HAVE_UNSIGNED_LONG_LONG */
790     }
791    
792     bool Arg::parse_double(const char* str, int n, void* dest) {
793     if (n == 0) return false;
794     static const int kMaxLength = 200;
795     char buf[kMaxLength];
796     if (n >= kMaxLength) return false;
797     memcpy(buf, str, n);
798     buf[n] = '\0';
799     errno = 0;
800     char* end;
801     double r = strtod(buf, &end);
802     if (end != buf + n) return false; // Leftover junk
803     if (errno) return false;
804     *(reinterpret_cast<double*>(dest)) = r;
805     return true;
806     }
807    
808     bool Arg::parse_float(const char* str, int n, void* dest) {
809     double r;
810     if (!parse_double(str, n, &r)) return false;
811     *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
812     return true;
813     }
814    
815    
816     #define DEFINE_INTEGER_PARSERS(name) \
817     bool Arg::parse_##name(const char* str, int n, void* dest) { \
818     return parse_##name##_radix(str, n, dest, 10); \
819     } \
820     bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
821     return parse_##name##_radix(str, n, dest, 16); \
822     } \
823     bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
824     return parse_##name##_radix(str, n, dest, 8); \
825     } \
826     bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
827     return parse_##name##_radix(str, n, dest, 0); \
828     }
829    
830 nigel 93 DEFINE_INTEGER_PARSERS(short) /* */
831     DEFINE_INTEGER_PARSERS(ushort) /* */
832     DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
833     DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
834     DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
835     DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
836     DEFINE_INTEGER_PARSERS(longlong) /* */
837     DEFINE_INTEGER_PARSERS(ulonglong) /* */
838 nigel 77
839     #undef DEFINE_INTEGER_PARSERS
840    
841     } // namespace pcrecpp

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12