/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Contents of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 1 month ago) by ph10
File size: 30119 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Sanjay Ghemawat
31
32 #ifdef HAVE_CONFIG_H
33 # include <config.h>
34 #endif
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <ctype.h>
39 #include <limits.h> /* for SHRT_MIN, USHRT_MAX, etc */
40 #include <assert.h>
41 #include <errno.h>
42 #include <string>
43 #include <algorithm>
44 // We need this to compile the proper dll on windows/msys. This is copied
45 // from pcre_internal.h. It would probably be better just to include that.
46 #define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
47 #include "pcre.h"
48 #include "pcre_stringpiece.h"
49 #include "pcrecpp.h"
50
51
52 namespace pcrecpp {
53
54 // Maximum number of args we can set
55 static const int kMaxArgs = 16;
56 static const int kVecSize = (1 + kMaxArgs) * 3; // results + PCRE workspace
57
58 // Special object that stands-in for no argument
59 Arg no_arg((void*)NULL);
60
61 // If a regular expression has no error, its error_ field points here
62 static const string empty_string;
63
64 // If the user doesn't ask for any options, we just use this one
65 static RE_Options default_options;
66
67 void RE::Init(const string& pat, const RE_Options* options) {
68 pattern_ = pat;
69 if (options == NULL) {
70 options_ = default_options;
71 } else {
72 options_ = *options;
73 }
74 error_ = &empty_string;
75 re_full_ = NULL;
76 re_partial_ = NULL;
77
78 re_partial_ = Compile(UNANCHORED);
79 if (re_partial_ != NULL) {
80 // Check for complicated patterns. The following change is
81 // conservative in that it may treat some "simple" patterns
82 // as "complex" (e.g., if the vertical bar is in a character
83 // class or is escaped). But it seems good enough.
84 if (strchr(pat.c_str(), '|') == NULL) {
85 // Simple pattern: we can use position-based checks to perform
86 // fully anchored matches
87 re_full_ = re_partial_;
88 } else {
89 // We need a special pattern for anchored matches
90 re_full_ = Compile(ANCHOR_BOTH);
91 }
92 }
93 }
94
95 void RE::Cleanup() {
96 if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
97 if (re_partial_ != NULL) (*pcre_free)(re_partial_);
98 if (error_ != &empty_string) delete error_;
99 }
100
101
102 RE::~RE() {
103 Cleanup();
104 }
105
106
107 pcre* RE::Compile(Anchor anchor) {
108 // First, convert RE_Options into pcre options
109 int pcre_options = 0;
110 pcre_options = options_.all_options();
111
112 // Special treatment for anchoring. This is needed because at
113 // runtime pcre only provides an option for anchoring at the
114 // beginning of a string (unless you use offset).
115 //
116 // There are three types of anchoring we want:
117 // UNANCHORED Compile the original pattern, and use
118 // a pcre unanchored match.
119 // ANCHOR_START Compile the original pattern, and use
120 // a pcre anchored match.
121 // ANCHOR_BOTH Tack a "\z" to the end of the original pattern
122 // and use a pcre anchored match.
123
124 const char* compile_error;
125 int eoffset;
126 pcre* re;
127 if (anchor != ANCHOR_BOTH) {
128 re = pcre_compile(pattern_.c_str(), pcre_options,
129 &compile_error, &eoffset, NULL);
130 } else {
131 // Tack a '\z' at the end of RE. Parenthesize it first so that
132 // the '\z' applies to all top-level alternatives in the regexp.
133 string wrapped = "(?:"; // A non-counting grouping operator
134 wrapped += pattern_;
135 wrapped += ")\\z";
136 re = pcre_compile(wrapped.c_str(), pcre_options,
137 &compile_error, &eoffset, NULL);
138 }
139 if (re == NULL) {
140 if (error_ == &empty_string) error_ = new string(compile_error);
141 }
142 return re;
143 }
144
145 /***** Matching interfaces *****/
146
147 bool RE::FullMatch(const StringPiece& text,
148 const Arg& ptr1,
149 const Arg& ptr2,
150 const Arg& ptr3,
151 const Arg& ptr4,
152 const Arg& ptr5,
153 const Arg& ptr6,
154 const Arg& ptr7,
155 const Arg& ptr8,
156 const Arg& ptr9,
157 const Arg& ptr10,
158 const Arg& ptr11,
159 const Arg& ptr12,
160 const Arg& ptr13,
161 const Arg& ptr14,
162 const Arg& ptr15,
163 const Arg& ptr16) const {
164 const Arg* args[kMaxArgs];
165 int n = 0;
166 if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
167 if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
168 if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
169 if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
170 if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
171 if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
172 if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
173 if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
174 if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
175 if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
176 if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
177 if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
178 if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
179 if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
180 if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
181 if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
182 done:
183
184 int consumed;
185 int vec[kVecSize];
186 return DoMatchImpl(text, ANCHOR_BOTH, &consumed, args, n, vec, kVecSize);
187 }
188
189 bool RE::PartialMatch(const StringPiece& text,
190 const Arg& ptr1,
191 const Arg& ptr2,
192 const Arg& ptr3,
193 const Arg& ptr4,
194 const Arg& ptr5,
195 const Arg& ptr6,
196 const Arg& ptr7,
197 const Arg& ptr8,
198 const Arg& ptr9,
199 const Arg& ptr10,
200 const Arg& ptr11,
201 const Arg& ptr12,
202 const Arg& ptr13,
203 const Arg& ptr14,
204 const Arg& ptr15,
205 const Arg& ptr16) const {
206 const Arg* args[kMaxArgs];
207 int n = 0;
208 if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
209 if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
210 if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
211 if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
212 if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
213 if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
214 if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
215 if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
216 if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
217 if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
218 if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
219 if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
220 if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
221 if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
222 if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
223 if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
224 done:
225
226 int consumed;
227 int vec[kVecSize];
228 return DoMatchImpl(text, UNANCHORED, &consumed, args, n, vec, kVecSize);
229 }
230
231 bool RE::Consume(StringPiece* input,
232 const Arg& ptr1,
233 const Arg& ptr2,
234 const Arg& ptr3,
235 const Arg& ptr4,
236 const Arg& ptr5,
237 const Arg& ptr6,
238 const Arg& ptr7,
239 const Arg& ptr8,
240 const Arg& ptr9,
241 const Arg& ptr10,
242 const Arg& ptr11,
243 const Arg& ptr12,
244 const Arg& ptr13,
245 const Arg& ptr14,
246 const Arg& ptr15,
247 const Arg& ptr16) const {
248 const Arg* args[kMaxArgs];
249 int n = 0;
250 if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
251 if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
252 if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
253 if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
254 if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
255 if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
256 if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
257 if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
258 if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
259 if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
260 if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
261 if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
262 if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
263 if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
264 if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
265 if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
266 done:
267
268 int consumed;
269 int vec[kVecSize];
270 if (DoMatchImpl(*input, ANCHOR_START, &consumed,
271 args, n, vec, kVecSize)) {
272 input->remove_prefix(consumed);
273 return true;
274 } else {
275 return false;
276 }
277 }
278
279 bool RE::FindAndConsume(StringPiece* input,
280 const Arg& ptr1,
281 const Arg& ptr2,
282 const Arg& ptr3,
283 const Arg& ptr4,
284 const Arg& ptr5,
285 const Arg& ptr6,
286 const Arg& ptr7,
287 const Arg& ptr8,
288 const Arg& ptr9,
289 const Arg& ptr10,
290 const Arg& ptr11,
291 const Arg& ptr12,
292 const Arg& ptr13,
293 const Arg& ptr14,
294 const Arg& ptr15,
295 const Arg& ptr16) const {
296 const Arg* args[kMaxArgs];
297 int n = 0;
298 if (&ptr1 == &no_arg) goto done; args[n++] = &ptr1;
299 if (&ptr2 == &no_arg) goto done; args[n++] = &ptr2;
300 if (&ptr3 == &no_arg) goto done; args[n++] = &ptr3;
301 if (&ptr4 == &no_arg) goto done; args[n++] = &ptr4;
302 if (&ptr5 == &no_arg) goto done; args[n++] = &ptr5;
303 if (&ptr6 == &no_arg) goto done; args[n++] = &ptr6;
304 if (&ptr7 == &no_arg) goto done; args[n++] = &ptr7;
305 if (&ptr8 == &no_arg) goto done; args[n++] = &ptr8;
306 if (&ptr9 == &no_arg) goto done; args[n++] = &ptr9;
307 if (&ptr10 == &no_arg) goto done; args[n++] = &ptr10;
308 if (&ptr11 == &no_arg) goto done; args[n++] = &ptr11;
309 if (&ptr12 == &no_arg) goto done; args[n++] = &ptr12;
310 if (&ptr13 == &no_arg) goto done; args[n++] = &ptr13;
311 if (&ptr14 == &no_arg) goto done; args[n++] = &ptr14;
312 if (&ptr15 == &no_arg) goto done; args[n++] = &ptr15;
313 if (&ptr16 == &no_arg) goto done; args[n++] = &ptr16;
314 done:
315
316 int consumed;
317 int vec[kVecSize];
318 if (DoMatchImpl(*input, UNANCHORED, &consumed,
319 args, n, vec, kVecSize)) {
320 input->remove_prefix(consumed);
321 return true;
322 } else {
323 return false;
324 }
325 }
326
327 bool RE::Replace(const StringPiece& rewrite,
328 string *str) const {
329 int vec[kVecSize];
330 int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
331 if (matches == 0)
332 return false;
333
334 string s;
335 if (!Rewrite(&s, rewrite, *str, vec, matches))
336 return false;
337
338 assert(vec[0] >= 0);
339 assert(vec[1] >= 0);
340 str->replace(vec[0], vec[1] - vec[0], s);
341 return true;
342 }
343
344 // Returns PCRE_NEWLINE_CRLF, PCRE_NEWLINE_CR, or PCRE_NEWLINE_LF.
345 // Note that PCRE_NEWLINE_CRLF is defined to be P_N_CR | P_N_LF.
346 static int NewlineMode(int pcre_options) {
347 // TODO: if we can make it threadsafe, cache this var
348 int newline_mode = 0;
349 /* if (newline_mode) return newline_mode; */ // do this once it's cached
350 if (pcre_options & (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF)) {
351 newline_mode = (pcre_options &
352 (PCRE_NEWLINE_CRLF|PCRE_NEWLINE_CR|PCRE_NEWLINE_LF));
353 } else {
354 int newline;
355 pcre_config(PCRE_CONFIG_NEWLINE, &newline);
356 if (newline == 10)
357 newline_mode = PCRE_NEWLINE_LF;
358 else if (newline == 13)
359 newline_mode = PCRE_NEWLINE_CR;
360 else if (newline == 3338)
361 newline_mode = PCRE_NEWLINE_CRLF;
362 else
363 assert("" == "Unexpected return value from pcre_config(NEWLINE)");
364 }
365 return newline_mode;
366 }
367
368 int RE::GlobalReplace(const StringPiece& rewrite,
369 string *str) const {
370 int count = 0;
371 int vec[kVecSize];
372 string out;
373 int start = 0;
374 int lastend = -1;
375
376 for (; start <= static_cast<int>(str->length()); count++) {
377 int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
378 if (matches <= 0)
379 break;
380 int matchstart = vec[0], matchend = vec[1];
381 assert(matchstart >= start);
382 assert(matchend >= matchstart);
383 if (matchstart == matchend && matchstart == lastend) {
384 // advance one character if we matched an empty string at the same
385 // place as the last match occurred
386 matchend = start + 1;
387 // If the current char is CR and we're in CRLF mode, skip LF too.
388 // Note it's better to call pcre_fullinfo() than to examine
389 // all_options(), since options_ could have changed bewteen
390 // compile-time and now, but this is simpler and safe enough.
391 if (start+1 < static_cast<int>(str->length()) &&
392 (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
393 NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF) {
394 matchend++;
395 }
396 // We also need to advance more than one char if we're in utf8 mode.
397 #ifdef SUPPORT_UTF8
398 if (options_.utf8()) {
399 while (matchend < static_cast<int>(str->length()) &&
400 ((*str)[matchend] & 0xc0) == 0x80)
401 matchend++;
402 }
403 #endif
404 if (matchend <= static_cast<int>(str->length()))
405 out.append(*str, start, matchend - start);
406 start = matchend;
407 } else {
408 out.append(*str, start, matchstart - start);
409 Rewrite(&out, rewrite, *str, vec, matches);
410 start = matchend;
411 lastend = matchend;
412 count++;
413 }
414 }
415
416 if (count == 0)
417 return 0;
418
419 if (start < static_cast<int>(str->length()))
420 out.append(*str, start, str->length() - start);
421 swap(out, *str);
422 return count;
423 }
424
425 bool RE::Extract(const StringPiece& rewrite,
426 const StringPiece& text,
427 string *out) const {
428 int vec[kVecSize];
429 int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
430 if (matches == 0)
431 return false;
432 out->erase();
433 return Rewrite(out, rewrite, text, vec, matches);
434 }
435
436 /*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
437 string result;
438
439 // Escape any ascii character not in [A-Za-z_0-9].
440 //
441 // Note that it's legal to escape a character even if it has no
442 // special meaning in a regular expression -- so this function does
443 // that. (This also makes it identical to the perl function of the
444 // same name; see `perldoc -f quotemeta`.)
445 for (int ii = 0; ii < unquoted.size(); ++ii) {
446 // Note that using 'isalnum' here raises the benchmark time from
447 // 32ns to 58ns:
448 if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
449 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
450 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
451 unquoted[ii] != '_' &&
452 // If this is the part of a UTF8 or Latin1 character, we need
453 // to copy this byte without escaping. Experimentally this is
454 // what works correctly with the regexp library.
455 !(unquoted[ii] & 128)) {
456 result += '\\';
457 }
458 result += unquoted[ii];
459 }
460
461 return result;
462 }
463
464 /***** Actual matching and rewriting code *****/
465
466 int RE::TryMatch(const StringPiece& text,
467 int startpos,
468 Anchor anchor,
469 int *vec,
470 int vecsize) const {
471 pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
472 if (re == NULL) {
473 //fprintf(stderr, "Matching against invalid re: %s\n", error_->c_str());
474 return 0;
475 }
476
477 pcre_extra extra = { 0 };
478 if (options_.match_limit() > 0) {
479 extra.flags |= PCRE_EXTRA_MATCH_LIMIT;
480 extra.match_limit = options_.match_limit();
481 }
482 if (options_.match_limit_recursion() > 0) {
483 extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
484 extra.match_limit_recursion = options_.match_limit_recursion();
485 }
486 int rc = pcre_exec(re, // The regular expression object
487 &extra,
488 (text.data() == NULL) ? "" : text.data(),
489 text.size(),
490 startpos,
491 (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
492 vec,
493 vecsize);
494
495 // Handle errors
496 if (rc == PCRE_ERROR_NOMATCH) {
497 return 0;
498 } else if (rc < 0) {
499 //fprintf(stderr, "Unexpected return code: %d when matching '%s'\n",
500 // re, pattern_.c_str());
501 return 0;
502 } else if (rc == 0) {
503 // pcre_exec() returns 0 as a special case when the number of
504 // capturing subpatterns exceeds the size of the vector.
505 // When this happens, there is a match and the output vector
506 // is filled, but we miss out on the positions of the extra subpatterns.
507 rc = vecsize / 2;
508 }
509
510 if ((anchor == ANCHOR_BOTH) && (re_full_ == re_partial_)) {
511 // We need an extra check to make sure that the match extended
512 // to the end of the input string
513 assert(vec[0] == 0); // PCRE_ANCHORED forces starting match
514 if (vec[1] != text.size()) return 0; // Did not get ending match
515 }
516
517 return rc;
518 }
519
520 bool RE::DoMatchImpl(const StringPiece& text,
521 Anchor anchor,
522 int* consumed,
523 const Arg* const* args,
524 int n,
525 int* vec,
526 int vecsize) const {
527 assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
528 int matches = TryMatch(text, 0, anchor, vec, vecsize);
529 assert(matches >= 0); // TryMatch never returns negatives
530 if (matches == 0)
531 return false;
532
533 *consumed = vec[1];
534
535 if (n == 0 || args == NULL) {
536 // We are not interested in results
537 return true;
538 }
539
540 if (NumberOfCapturingGroups() < n) {
541 // RE has fewer capturing groups than number of arg pointers passed in
542 return false;
543 }
544
545 // If we got here, we must have matched the whole pattern.
546 // We do not need (can not do) any more checks on the value of 'matches' here
547 // -- see the comment for TryMatch.
548 for (int i = 0; i < n; i++) {
549 const int start = vec[2*(i+1)];
550 const int limit = vec[2*(i+1)+1];
551 if (!args[i]->Parse(text.data() + start, limit-start)) {
552 // TODO: Should we indicate what the error was?
553 return false;
554 }
555 }
556
557 return true;
558 }
559
560 bool RE::DoMatch(const StringPiece& text,
561 Anchor anchor,
562 int* consumed,
563 const Arg* const args[],
564 int n) const {
565 assert(n >= 0);
566 size_t const vecsize = (1 + n) * 3; // results + PCRE workspace
567 // (as for kVecSize)
568 int space[21]; // use stack allocation for small vecsize (common case)
569 int* vec = vecsize <= 21 ? space : new int[vecsize];
570 bool retval = DoMatchImpl(text, anchor, consumed, args, n, vec, vecsize);
571 if (vec != space) delete [] vec;
572 return retval;
573 }
574
575 bool RE::Rewrite(string *out, const StringPiece &rewrite,
576 const StringPiece &text, int *vec, int veclen) const {
577 for (const char *s = rewrite.data(), *end = s + rewrite.size();
578 s < end; s++) {
579 int c = *s;
580 if (c == '\\') {
581 c = *++s;
582 if (isdigit(c)) {
583 int n = (c - '0');
584 if (n >= veclen) {
585 //fprintf(stderr, requested group %d in regexp %.*s\n",
586 // n, rewrite.size(), rewrite.data());
587 return false;
588 }
589 int start = vec[2 * n];
590 if (start >= 0)
591 out->append(text.data() + start, vec[2 * n + 1] - start);
592 } else if (c == '\\') {
593 out->push_back('\\');
594 } else {
595 //fprintf(stderr, "invalid rewrite pattern: %.*s\n",
596 // rewrite.size(), rewrite.data());
597 return false;
598 }
599 } else {
600 out->push_back(c);
601 }
602 }
603 return true;
604 }
605
606 // Return the number of capturing subpatterns, or -1 if the
607 // regexp wasn't valid on construction.
608 int RE::NumberOfCapturingGroups() const {
609 if (re_partial_ == NULL) return -1;
610
611 int result;
612 int pcre_retval = pcre_fullinfo(re_partial_, // The regular expression object
613 NULL, // We did not study the pattern
614 PCRE_INFO_CAPTURECOUNT,
615 &result);
616 assert(pcre_retval == 0);
617 return result;
618 }
619
620 /***** Parsers for various types *****/
621
622 bool Arg::parse_null(const char* str, int n, void* dest) {
623 // We fail if somebody asked us to store into a non-NULL void* pointer
624 return (dest == NULL);
625 }
626
627 bool Arg::parse_string(const char* str, int n, void* dest) {
628 reinterpret_cast<string*>(dest)->assign(str, n);
629 return true;
630 }
631
632 bool Arg::parse_stringpiece(const char* str, int n, void* dest) {
633 reinterpret_cast<StringPiece*>(dest)->set(str, n);
634 return true;
635 }
636
637 bool Arg::parse_char(const char* str, int n, void* dest) {
638 if (n != 1) return false;
639 *(reinterpret_cast<char*>(dest)) = str[0];
640 return true;
641 }
642
643 bool Arg::parse_uchar(const char* str, int n, void* dest) {
644 if (n != 1) return false;
645 *(reinterpret_cast<unsigned char*>(dest)) = str[0];
646 return true;
647 }
648
649 // Largest number spec that we are willing to parse
650 static const int kMaxNumberLength = 32;
651
652 // REQUIRES "buf" must have length at least kMaxNumberLength+1
653 // REQUIRES "n > 0"
654 // Copies "str" into "buf" and null-terminates if necessary.
655 // Returns one of:
656 // a. "str" if no termination is needed
657 // b. "buf" if the string was copied and null-terminated
658 // c. "" if the input was invalid and has no hope of being parsed
659 static const char* TerminateNumber(char* buf, const char* str, int n) {
660 if ((n > 0) && isspace(*str)) {
661 // We are less forgiving than the strtoxxx() routines and do not
662 // allow leading spaces.
663 return "";
664 }
665
666 // See if the character right after the input text may potentially
667 // look like a digit.
668 if (isdigit(str[n]) ||
669 ((str[n] >= 'a') && (str[n] <= 'f')) ||
670 ((str[n] >= 'A') && (str[n] <= 'F'))) {
671 if (n > kMaxNumberLength) return ""; // Input too big to be a valid number
672 memcpy(buf, str, n);
673 buf[n] = '\0';
674 return buf;
675 } else {
676 // We can parse right out of the supplied string, so return it.
677 return str;
678 }
679 }
680
681 bool Arg::parse_long_radix(const char* str,
682 int n,
683 void* dest,
684 int radix) {
685 if (n == 0) return false;
686 char buf[kMaxNumberLength+1];
687 str = TerminateNumber(buf, str, n);
688 char* end;
689 errno = 0;
690 long r = strtol(str, &end, radix);
691 if (end != str + n) return false; // Leftover junk
692 if (errno) return false;
693 *(reinterpret_cast<long*>(dest)) = r;
694 return true;
695 }
696
697 bool Arg::parse_ulong_radix(const char* str,
698 int n,
699 void* dest,
700 int radix) {
701 if (n == 0) return false;
702 char buf[kMaxNumberLength+1];
703 str = TerminateNumber(buf, str, n);
704 if (str[0] == '-') return false; // strtoul() on a negative number?!
705 char* end;
706 errno = 0;
707 unsigned long r = strtoul(str, &end, radix);
708 if (end != str + n) return false; // Leftover junk
709 if (errno) return false;
710 *(reinterpret_cast<unsigned long*>(dest)) = r;
711 return true;
712 }
713
714 bool Arg::parse_short_radix(const char* str,
715 int n,
716 void* dest,
717 int radix) {
718 long r;
719 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
720 if (r < SHRT_MIN || r > SHRT_MAX) return false; // Out of range
721 *(reinterpret_cast<short*>(dest)) = r;
722 return true;
723 }
724
725 bool Arg::parse_ushort_radix(const char* str,
726 int n,
727 void* dest,
728 int radix) {
729 unsigned long r;
730 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
731 if (r > USHRT_MAX) return false; // Out of range
732 *(reinterpret_cast<unsigned short*>(dest)) = r;
733 return true;
734 }
735
736 bool Arg::parse_int_radix(const char* str,
737 int n,
738 void* dest,
739 int radix) {
740 long r;
741 if (!parse_long_radix(str, n, &r, radix)) return false; // Could not parse
742 if (r < INT_MIN || r > INT_MAX) return false; // Out of range
743 *(reinterpret_cast<int*>(dest)) = r;
744 return true;
745 }
746
747 bool Arg::parse_uint_radix(const char* str,
748 int n,
749 void* dest,
750 int radix) {
751 unsigned long r;
752 if (!parse_ulong_radix(str, n, &r, radix)) return false; // Could not parse
753 if (r > UINT_MAX) return false; // Out of range
754 *(reinterpret_cast<unsigned int*>(dest)) = r;
755 return true;
756 }
757
758 bool Arg::parse_longlong_radix(const char* str,
759 int n,
760 void* dest,
761 int radix) {
762 #ifndef HAVE_LONG_LONG
763 return false;
764 #else
765 if (n == 0) return false;
766 char buf[kMaxNumberLength+1];
767 str = TerminateNumber(buf, str, n);
768 char* end;
769 errno = 0;
770 #if defined HAVE_STRTOQ
771 long long r = strtoq(str, &end, radix);
772 #elif defined HAVE_STRTOLL
773 long long r = strtoll(str, &end, radix);
774 #else
775 #error parse_longlong_radix: cannot convert input to a long-long
776 #endif
777 if (end != str + n) return false; // Leftover junk
778 if (errno) return false;
779 *(reinterpret_cast<long long*>(dest)) = r;
780 return true;
781 #endif /* HAVE_LONG_LONG */
782 }
783
784 bool Arg::parse_ulonglong_radix(const char* str,
785 int n,
786 void* dest,
787 int radix) {
788 #ifndef HAVE_UNSIGNED_LONG_LONG
789 return false;
790 #else
791 if (n == 0) return false;
792 char buf[kMaxNumberLength+1];
793 str = TerminateNumber(buf, str, n);
794 if (str[0] == '-') return false; // strtoull() on a negative number?!
795 char* end;
796 errno = 0;
797 #if defined HAVE_STRTOQ
798 unsigned long long r = strtouq(str, &end, radix);
799 #elif defined HAVE_STRTOLL
800 unsigned long long r = strtoull(str, &end, radix);
801 #else
802 #error parse_ulonglong_radix: cannot convert input to a long-long
803 #endif
804 if (end != str + n) return false; // Leftover junk
805 if (errno) return false;
806 *(reinterpret_cast<unsigned long long*>(dest)) = r;
807 return true;
808 #endif /* HAVE_UNSIGNED_LONG_LONG */
809 }
810
811 bool Arg::parse_double(const char* str, int n, void* dest) {
812 if (n == 0) return false;
813 static const int kMaxLength = 200;
814 char buf[kMaxLength];
815 if (n >= kMaxLength) return false;
816 memcpy(buf, str, n);
817 buf[n] = '\0';
818 errno = 0;
819 char* end;
820 double r = strtod(buf, &end);
821 if (end != buf + n) return false; // Leftover junk
822 if (errno) return false;
823 *(reinterpret_cast<double*>(dest)) = r;
824 return true;
825 }
826
827 bool Arg::parse_float(const char* str, int n, void* dest) {
828 double r;
829 if (!parse_double(str, n, &r)) return false;
830 *(reinterpret_cast<float*>(dest)) = static_cast<float>(r);
831 return true;
832 }
833
834
835 #define DEFINE_INTEGER_PARSERS(name) \
836 bool Arg::parse_##name(const char* str, int n, void* dest) { \
837 return parse_##name##_radix(str, n, dest, 10); \
838 } \
839 bool Arg::parse_##name##_hex(const char* str, int n, void* dest) { \
840 return parse_##name##_radix(str, n, dest, 16); \
841 } \
842 bool Arg::parse_##name##_octal(const char* str, int n, void* dest) { \
843 return parse_##name##_radix(str, n, dest, 8); \
844 } \
845 bool Arg::parse_##name##_cradix(const char* str, int n, void* dest) { \
846 return parse_##name##_radix(str, n, dest, 0); \
847 }
848
849 DEFINE_INTEGER_PARSERS(short) /* */
850 DEFINE_INTEGER_PARSERS(ushort) /* */
851 DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
852 DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
853 DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
854 DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
855 DEFINE_INTEGER_PARSERS(longlong) /* */
856 DEFINE_INTEGER_PARSERS(ulonglong) /* */
857
858 #undef DEFINE_INTEGER_PARSERS
859
860 } // namespace pcrecpp

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12