| 1 |
// Copyright (c) 2005, Google Inc.
|
| 2 |
// All rights reserved.
|
| 3 |
//
|
| 4 |
// Redistribution and use in source and binary forms, with or without
|
| 5 |
// modification, are permitted provided that the following conditions are
|
| 6 |
// met:
|
| 7 |
//
|
| 8 |
// * Redistributions of source code must retain the above copyright
|
| 9 |
// notice, this list of conditions and the following disclaimer.
|
| 10 |
// * Redistributions in binary form must reproduce the above
|
| 11 |
// copyright notice, this list of conditions and the following disclaimer
|
| 12 |
// in the documentation and/or other materials provided with the
|
| 13 |
// distribution.
|
| 14 |
// * Neither the name of Google Inc. nor the names of its
|
| 15 |
// contributors may be used to endorse or promote products derived from
|
| 16 |
// this software without specific prior written permission.
|
| 17 |
//
|
| 18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 29 |
//
|
| 30 |
// Author: Sanjay Ghemawat
|
| 31 |
|
| 32 |
#include <vector>
|
| 33 |
#include <assert.h>
|
| 34 |
#include "config.h"
|
| 35 |
#include "pcre_scanner.h"
|
| 36 |
|
| 37 |
using std::vector;
|
| 38 |
|
| 39 |
namespace pcrecpp {
|
| 40 |
|
| 41 |
Scanner::Scanner()
|
| 42 |
: data_(),
|
| 43 |
input_(data_),
|
| 44 |
skip_(NULL),
|
| 45 |
should_skip_(false),
|
| 46 |
skip_repeat_(false),
|
| 47 |
save_comments_(false),
|
| 48 |
comments_(NULL),
|
| 49 |
comments_offset_(0) {
|
| 50 |
}
|
| 51 |
|
| 52 |
Scanner::Scanner(const string& in)
|
| 53 |
: data_(in),
|
| 54 |
input_(data_),
|
| 55 |
skip_(NULL),
|
| 56 |
should_skip_(false),
|
| 57 |
skip_repeat_(false),
|
| 58 |
save_comments_(false),
|
| 59 |
comments_(NULL),
|
| 60 |
comments_offset_(0) {
|
| 61 |
}
|
| 62 |
|
| 63 |
Scanner::~Scanner() {
|
| 64 |
delete skip_;
|
| 65 |
delete comments_;
|
| 66 |
}
|
| 67 |
|
| 68 |
void Scanner::SetSkipExpression(const char* re) {
|
| 69 |
delete skip_;
|
| 70 |
if (re != NULL) {
|
| 71 |
skip_ = new RE(re);
|
| 72 |
should_skip_ = true;
|
| 73 |
skip_repeat_ = true;
|
| 74 |
ConsumeSkip();
|
| 75 |
} else {
|
| 76 |
skip_ = NULL;
|
| 77 |
should_skip_ = false;
|
| 78 |
skip_repeat_ = false;
|
| 79 |
}
|
| 80 |
}
|
| 81 |
|
| 82 |
void Scanner::Skip(const char* re) {
|
| 83 |
delete skip_;
|
| 84 |
if (re != NULL) {
|
| 85 |
skip_ = new RE(re);
|
| 86 |
should_skip_ = true;
|
| 87 |
skip_repeat_ = false;
|
| 88 |
ConsumeSkip();
|
| 89 |
} else {
|
| 90 |
skip_ = NULL;
|
| 91 |
should_skip_ = false;
|
| 92 |
skip_repeat_ = false;
|
| 93 |
}
|
| 94 |
}
|
| 95 |
|
| 96 |
void Scanner::DisableSkip() {
|
| 97 |
assert(skip_ != NULL);
|
| 98 |
should_skip_ = false;
|
| 99 |
}
|
| 100 |
|
| 101 |
void Scanner::EnableSkip() {
|
| 102 |
assert(skip_ != NULL);
|
| 103 |
should_skip_ = true;
|
| 104 |
ConsumeSkip();
|
| 105 |
}
|
| 106 |
|
| 107 |
int Scanner::LineNumber() const {
|
| 108 |
// TODO: Make it more efficient by keeping track of the last point
|
| 109 |
// where we computed line numbers and counting newlines since then.
|
| 110 |
// We could use std:count, but not all systems have it. :-(
|
| 111 |
int count = 1;
|
| 112 |
for (const char* p = data_.data(); p < input_.data(); ++p)
|
| 113 |
if (*p == '\n')
|
| 114 |
++count;
|
| 115 |
return count;
|
| 116 |
}
|
| 117 |
|
| 118 |
int Scanner::Offset() const {
|
| 119 |
return input_.data() - data_.c_str();
|
| 120 |
}
|
| 121 |
|
| 122 |
bool Scanner::LookingAt(const RE& re) const {
|
| 123 |
int consumed;
|
| 124 |
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
|
| 125 |
}
|
| 126 |
|
| 127 |
|
| 128 |
bool Scanner::Consume(const RE& re,
|
| 129 |
const Arg& arg0,
|
| 130 |
const Arg& arg1,
|
| 131 |
const Arg& arg2) {
|
| 132 |
const bool result = re.Consume(&input_, arg0, arg1, arg2);
|
| 133 |
if (result && should_skip_) ConsumeSkip();
|
| 134 |
return result;
|
| 135 |
}
|
| 136 |
|
| 137 |
// helper function to consume *skip_ and honour save_comments_
|
| 138 |
void Scanner::ConsumeSkip() {
|
| 139 |
const char* start_data = input_.data();
|
| 140 |
while (skip_->Consume(&input_)) {
|
| 141 |
if (!skip_repeat_) {
|
| 142 |
// Only one skip allowed.
|
| 143 |
break;
|
| 144 |
}
|
| 145 |
}
|
| 146 |
if (save_comments_) {
|
| 147 |
if (comments_ == NULL) {
|
| 148 |
comments_ = new vector<StringPiece>;
|
| 149 |
}
|
| 150 |
// already pointing one past end, so no need to +1
|
| 151 |
int length = input_.data() - start_data;
|
| 152 |
if (length > 0) {
|
| 153 |
comments_->push_back(StringPiece(start_data, length));
|
| 154 |
}
|
| 155 |
}
|
| 156 |
}
|
| 157 |
|
| 158 |
|
| 159 |
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
|
| 160 |
// short circuit out if we've not yet initialized comments_
|
| 161 |
// (e.g., when save_comments is false)
|
| 162 |
if (!comments_) {
|
| 163 |
return;
|
| 164 |
}
|
| 165 |
// TODO: if we guarantee that comments_ will contain StringPieces
|
| 166 |
// that are ordered by their start, then we can do a binary search
|
| 167 |
// for the first StringPiece at or past start and then scan for the
|
| 168 |
// ones contained in the range, quit early (use equal_range or
|
| 169 |
// lower_bound)
|
| 170 |
for (vector<StringPiece>::const_iterator it = comments_->begin();
|
| 171 |
it != comments_->end(); ++it) {
|
| 172 |
if ((it->data() >= data_.c_str() + start &&
|
| 173 |
it->data() + it->size() <= data_.c_str() + end)) {
|
| 174 |
ranges->push_back(*it);
|
| 175 |
}
|
| 176 |
}
|
| 177 |
}
|
| 178 |
|
| 179 |
|
| 180 |
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
|
| 181 |
// short circuit out if we've not yet initialized comments_
|
| 182 |
// (e.g., when save_comments is false)
|
| 183 |
if (!comments_) {
|
| 184 |
return;
|
| 185 |
}
|
| 186 |
for (vector<StringPiece>::const_iterator it =
|
| 187 |
comments_->begin() + comments_offset_;
|
| 188 |
it != comments_->end(); ++it) {
|
| 189 |
ranges->push_back(*it);
|
| 190 |
++comments_offset_;
|
| 191 |
}
|
| 192 |
}
|
| 193 |
|
| 194 |
} // namespace pcrecpp
|