pcre_scanner.cc 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. // Copyright (c) 2005, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. //
  30. // Author: Sanjay Ghemawat
  31. #ifdef HAVE_CONFIG_H
  32. #include "pcre_config.h"
  33. #endif
  34. #include <vector>
  35. #include <assert.h>
  36. #include "pcrecpp_internal.h"
  37. #include "pcre_scanner.h"
  38. using std::vector;
  39. namespace pcrecpp {
  40. Scanner::Scanner()
  41. : data_(),
  42. input_(data_),
  43. skip_(NULL),
  44. should_skip_(false),
  45. skip_repeat_(false),
  46. save_comments_(false),
  47. comments_(NULL),
  48. comments_offset_(0) {
  49. }
  50. Scanner::Scanner(const string& in)
  51. : data_(in),
  52. input_(data_),
  53. skip_(NULL),
  54. should_skip_(false),
  55. skip_repeat_(false),
  56. save_comments_(false),
  57. comments_(NULL),
  58. comments_offset_(0) {
  59. }
  60. Scanner::~Scanner() {
  61. delete skip_;
  62. delete comments_;
  63. }
  64. void Scanner::SetSkipExpression(const char* re) {
  65. delete skip_;
  66. if (re != NULL) {
  67. skip_ = new RE(re);
  68. should_skip_ = true;
  69. skip_repeat_ = true;
  70. ConsumeSkip();
  71. } else {
  72. skip_ = NULL;
  73. should_skip_ = false;
  74. skip_repeat_ = false;
  75. }
  76. }
  77. void Scanner::Skip(const char* re) {
  78. delete skip_;
  79. if (re != NULL) {
  80. skip_ = new RE(re);
  81. should_skip_ = true;
  82. skip_repeat_ = false;
  83. ConsumeSkip();
  84. } else {
  85. skip_ = NULL;
  86. should_skip_ = false;
  87. skip_repeat_ = false;
  88. }
  89. }
  90. void Scanner::DisableSkip() {
  91. assert(skip_ != NULL);
  92. should_skip_ = false;
  93. }
  94. void Scanner::EnableSkip() {
  95. assert(skip_ != NULL);
  96. should_skip_ = true;
  97. ConsumeSkip();
  98. }
  99. int Scanner::LineNumber() const {
  100. // TODO: Make it more efficient by keeping track of the last point
  101. // where we computed line numbers and counting newlines since then.
  102. // We could use std:count, but not all systems have it. :-(
  103. int count = 1;
  104. for (const char* p = data_.data(); p < input_.data(); ++p)
  105. if (*p == '\n')
  106. ++count;
  107. return count;
  108. }
  109. int Scanner::Offset() const {
  110. return (int)(input_.data() - data_.c_str());
  111. }
  112. bool Scanner::LookingAt(const RE& re) const {
  113. int consumed;
  114. return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
  115. }
  116. bool Scanner::Consume(const RE& re,
  117. const Arg& arg0,
  118. const Arg& arg1,
  119. const Arg& arg2) {
  120. const bool result = re.Consume(&input_, arg0, arg1, arg2);
  121. if (result && should_skip_) ConsumeSkip();
  122. return result;
  123. }
  124. // helper function to consume *skip_ and honour save_comments_
  125. void Scanner::ConsumeSkip() {
  126. const char* start_data = input_.data();
  127. while (skip_->Consume(&input_)) {
  128. if (!skip_repeat_) {
  129. // Only one skip allowed.
  130. break;
  131. }
  132. }
  133. if (save_comments_) {
  134. if (comments_ == NULL) {
  135. comments_ = new vector<StringPiece>;
  136. }
  137. // already pointing one past end, so no need to +1
  138. int length = (int)(input_.data() - start_data);
  139. if (length > 0) {
  140. comments_->push_back(StringPiece(start_data, length));
  141. }
  142. }
  143. }
  144. void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
  145. // short circuit out if we've not yet initialized comments_
  146. // (e.g., when save_comments is false)
  147. if (!comments_) {
  148. return;
  149. }
  150. // TODO: if we guarantee that comments_ will contain StringPieces
  151. // that are ordered by their start, then we can do a binary search
  152. // for the first StringPiece at or past start and then scan for the
  153. // ones contained in the range, quit early (use equal_range or
  154. // lower_bound)
  155. for (vector<StringPiece>::const_iterator it = comments_->begin();
  156. it != comments_->end(); ++it) {
  157. if ((it->data() >= data_.c_str() + start &&
  158. it->data() + it->size() <= data_.c_str() + end)) {
  159. ranges->push_back(*it);
  160. }
  161. }
  162. }
  163. void Scanner::GetNextComments(vector<StringPiece> *ranges) {
  164. // short circuit out if we've not yet initialized comments_
  165. // (e.g., when save_comments is false)
  166. if (!comments_) {
  167. return;
  168. }
  169. for (vector<StringPiece>::const_iterator it =
  170. comments_->begin() + comments_offset_;
  171. it != comments_->end(); ++it) {
  172. ranges->push_back(*it);
  173. ++comments_offset_;
  174. }
  175. }
  176. } // namespace pcrecpp