Browse Source

Update contrib/libs/re2 to 2022-12-01

robot-contrib 2 years ago
parent
commit
a8bebdbb76

+ 3 - 3
contrib/libs/re2/re2/compile.cc

@@ -789,8 +789,8 @@ void Compiler::AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase) {
 // Should not be called.
 Frag Compiler::Copy(Frag arg) {
   // We're using WalkExponential; there should be no copying.
-  LOG(DFATAL) << "Compiler::Copy called!";
   failed_ = true;
+  LOG(DFATAL) << "Compiler::Copy called!";
   return NoMatch();
 }
 
@@ -916,8 +916,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
       CharClass* cc = re->cc();
       if (cc->empty()) {
         // This can't happen.
-        LOG(DFATAL) << "No ranges in char class";
         failed_ = true;
+        LOG(DFATAL) << "No ranges in char class";
         return NoMatch();
       }
 
@@ -974,8 +974,8 @@ Frag Compiler::PostVisit(Regexp* re, Frag, Frag, Frag* child_frags,
     case kRegexpNoWordBoundary:
       return EmptyWidth(kEmptyNonWordBoundary);
   }
-  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
   failed_ = true;
+  LOG(DFATAL) << "Missing case in Compiler: " << re->op();
   return NoMatch();
 }
 

+ 1 - 1
contrib/libs/re2/re2/dfa.cc

@@ -1675,8 +1675,8 @@ bool DFA::AnalyzeSearch(SearchParams* params) {
   if (!AnalyzeSearchHelper(params, info, flags)) {
     ResetCache(params->cache_lock);
     if (!AnalyzeSearchHelper(params, info, flags)) {
-      LOG(DFATAL) << "Failed to analyze start state.";
       params->failed = true;
+      LOG(DFATAL) << "Failed to analyze start state.";
       return false;
     }
   }

+ 1 - 3
contrib/libs/re2/re2/parse.cc

@@ -1589,8 +1589,6 @@ static bool ParseEscape(StringPiece* s, Rune* rp,
     //   return true;
   }
 
-  LOG(DFATAL) << "Not reached in ParseEscape.";
-
 BadEscape:
   // Unrecognized escape sequence.
   status->set_code(kRegexpBadEscape);
@@ -2059,8 +2057,8 @@ bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
 
   // Caller is supposed to check this.
   if (!(flags_ & PerlX) || t.size() < 2 || t[0] != '(' || t[1] != '?') {
-    LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
     status_->set_code(kRegexpInternalError);
+    LOG(DFATAL) << "Bad call to ParseState::ParsePerlFlags";
     return false;
   }
 

+ 25 - 26
contrib/libs/re2/re2/prefilter.cc

@@ -7,6 +7,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "util/util.h"
@@ -21,9 +22,6 @@ namespace re2 {
 
 static const bool ExtraDebug = false;
 
-typedef std::set<std::string>::iterator SSIter;
-typedef std::set<std::string>::const_iterator ConstSSIter;
-
 // Initializes a Prefilter, allocating subs_ as necessary.
 Prefilter::Prefilter(Op op) {
   op_ = op;
@@ -140,7 +138,7 @@ Prefilter* Prefilter::Or(Prefilter* a, Prefilter* b) {
   return AndOr(OR, a, b);
 }
 
-static void SimplifyStringSet(std::set<std::string>* ss) {
+void Prefilter::SimplifyStringSet(SSet* ss) {
   // Now make sure that the strings aren't redundant.  For example, if
   // we know "ab" is a required string, then it doesn't help at all to
   // know that "abc" is also a required string, so delete "abc". This
@@ -149,13 +147,19 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
   // candidate for match, so further matching "abc" is redundant.
   // Note that we must ignore "" because find() would find it at the
   // start of everything and thus we would end up erasing everything.
-  for (SSIter i = ss->begin(); i != ss->end(); ++i) {
-    if (i->empty())
-      continue;
+  //
+  // The SSet sorts strings by length, then lexicographically. Note that
+  // smaller strings appear first and all strings must be unique. These
+  // observations let us skip string comparisons when possible.
+  SSIter i = ss->begin();
+  if (i != ss->end() && i->empty()) {
+    ++i;
+  }
+  for (; i != ss->end(); ++i) {
     SSIter j = i;
     ++j;
     while (j != ss->end()) {
-      if (j->find(*i) != std::string::npos) {
+      if (j->size() > i->size() && j->find(*i) != std::string::npos) {
         j = ss->erase(j);
         continue;
       }
@@ -164,7 +168,7 @@ static void SimplifyStringSet(std::set<std::string>* ss) {
   }
 }
 
-Prefilter* Prefilter::OrStrings(std::set<std::string>* ss) {
+Prefilter* Prefilter::OrStrings(SSet* ss) {
   Prefilter* or_prefilter = new Prefilter(NONE);
   SimplifyStringSet(ss);
   for (SSIter i = ss->begin(); i != ss->end(); ++i)
@@ -226,14 +230,14 @@ class Prefilter::Info {
   // Caller takes ownership of the Prefilter.
   Prefilter* TakeMatch();
 
-  std::set<std::string>& exact() { return exact_; }
+  SSet& exact() { return exact_; }
 
   bool is_exact() const { return is_exact_; }
 
   class Walker;
 
  private:
-  std::set<std::string> exact_;
+  SSet exact_;
 
   // When is_exact_ is true, the strings that match
   // are placed in exact_. When it is no longer an exact
@@ -286,18 +290,7 @@ std::string Prefilter::Info::ToString() {
   return "";
 }
 
-// Add the strings from src to dst.
-static void CopyIn(const std::set<std::string>& src,
-                   std::set<std::string>* dst) {
-  for (ConstSSIter i = src.begin(); i != src.end(); ++i)
-    dst->insert(*i);
-}
-
-// Add the cross-product of a and b to dst.
-// (For each string i in a and j in b, add i+j.)
-static void CrossProduct(const std::set<std::string>& a,
-                         const std::set<std::string>& b,
-                         std::set<std::string>* dst) {
+void Prefilter::CrossProduct(const SSet& a, const SSet& b, SSet* dst) {
   for (ConstSSIter i = a.begin(); i != a.end(); ++i)
     for (ConstSSIter j = b.begin(); j != b.end(); ++j)
       dst->insert(*i + *j);
@@ -343,8 +336,14 @@ Prefilter::Info* Prefilter::Info::Alt(Info* a, Info* b) {
   Info *ab = new Info();
 
   if (a->is_exact_ && b->is_exact_) {
-    CopyIn(a->exact_, &ab->exact_);
-    CopyIn(b->exact_, &ab->exact_);
+    // Avoid string copies by moving the larger exact_ set into
+    // ab directly, then merge in the smaller set.
+    if (a->exact_.size() < b->exact_.size()) {
+      using std::swap;
+      swap(a, b);
+    }
+    ab->exact_ = std::move(a->exact_);
+    ab->exact_.insert(b->exact_.begin(), b->exact_.end());
     ab->is_exact_ = true;
   } else {
     // Either a or b has is_exact_ = false. If the other
@@ -532,8 +531,8 @@ Prefilter::Info* Prefilter::Info::Walker::PostVisit(
   switch (re->op()) {
     default:
     case kRegexpRepeat:
-      LOG(DFATAL) << "Bad regexp op " << re->op();
       info = EmptyString();
+      LOG(DFATAL) << "Bad regexp op " << re->op();
       break;
 
     case kRegexpNoMatch:

+ 23 - 1
contrib/libs/re2/re2/prefilter.h

@@ -60,8 +60,21 @@ class Prefilter {
   std::string DebugString() const;
 
  private:
+  // A comparator used to store exact strings. We compare by length,
+  // then lexicographically. This ordering makes it easier to reduce the
+  // set of strings in SimplifyStringSet.
+  struct LengthThenLex {
+    bool operator()(const std::string& a, const std::string& b) const {
+       return (a.size() < b.size()) || (a.size() == b.size() && a < b);
+    }
+  };
+
   class Info;
 
+  using SSet = std::set<std::string, LengthThenLex>;
+  using SSIter = SSet::iterator;
+  using ConstSSIter = SSet::const_iterator;
+
   // Combines two prefilters together to create an AND. The passed
   // Prefilters will be part of the returned Prefilter or deleted.
   static Prefilter* And(Prefilter* a, Prefilter* b);
@@ -77,12 +90,21 @@ class Prefilter {
 
   static Prefilter* FromString(const std::string& str);
 
-  static Prefilter* OrStrings(std::set<std::string>* ss);
+  static Prefilter* OrStrings(SSet* ss);
 
   static Info* BuildInfo(Regexp* re);
 
   Prefilter* Simplify();
 
+  // Removes redundant strings from the set. A string is redundant if
+  // any of the other strings appear as a substring. The empty string
+  // is a special case, which is ignored.
+  static void SimplifyStringSet(SSet* ss);
+
+  // Adds the cross-product of a and b to dst.
+  // (For each string i in a and j in b, add i+j.)
+  static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
+
   // Kind of Prefilter.
   Op op_;
 

+ 1 - 1
contrib/libs/re2/re2/prog.cc

@@ -511,7 +511,7 @@ void Prog::ComputeByteMap() {
 
   builder.Build(bytemap_, &bytemap_range_);
 
-  if (0) {  // For debugging, use trivial bytemap.
+  if ((0)) {  // For debugging, use trivial bytemap.
     LOG(ERROR) << "Using trivial bytemap.";
     for (int i = 0; i < 256; i++)
       bytemap_[i] = static_cast<uint8_t>(i);

+ 77 - 47
contrib/libs/re2/re2/re2.cc

@@ -36,6 +36,13 @@
 
 namespace re2 {
 
+// Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
+static int maximum_global_replace_count = -1;
+
+void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
+  maximum_global_replace_count = i;
+}
+
 // Maximum number of args we can set
 static const int kMaxArgs = 16;
 static const int kVecSize = 1+kMaxArgs;
@@ -43,11 +50,11 @@ static const int kVecSize = 1+kMaxArgs;
 const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
 
 RE2::Options::Options(RE2::CannedOptions opt)
-  : encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
+  : max_mem_(kDefaultMaxMem),
+    encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
     posix_syntax_(opt == RE2::POSIX),
     longest_match_(opt == RE2::POSIX),
     log_errors_(opt != RE2::Quiet),
-    max_mem_(kDefaultMaxMem),
     literal_(false),
     never_nl_(false),
     dot_nl_(false),
@@ -58,11 +65,30 @@ RE2::Options::Options(RE2::CannedOptions opt)
     one_line_(false) {
 }
 
-// static empty objects for use as const references.
-// To avoid global constructors, allocated in RE2::Init().
-static const std::string* empty_string;
-static const std::map<std::string, int>* empty_named_groups;
-static const std::map<int, std::string>* empty_group_names;
+// Empty objects for use as const references.
+// Statically allocating the storage and then
+// lazily constructing the objects (in a once
+// in RE2::Init()) avoids global constructors
+// and the false positives (thanks, Valgrind)
+// about memory leaks at program termination.
+struct EmptyStorage {
+  std::string empty_string;
+  std::map<std::string, int> empty_named_groups;
+  std::map<int, std::string> empty_group_names;
+};
+alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
+
+static inline std::string* empty_string() {
+  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
+}
+
+static inline std::map<std::string, int>* empty_named_groups() {
+  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
+}
+
+static inline std::map<int, std::string>* empty_group_names() {
+  return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
+}
 
 // Converts from Regexp error code to RE2 error code.
 // Maybe some day they will diverge.  In any event, this
@@ -173,23 +199,23 @@ int RE2::Options::ParseFlags() const {
 void RE2::Init(const StringPiece& pattern, const Options& options) {
   static std::once_flag empty_once;
   std::call_once(empty_once, []() {
-    empty_string = new std::string;
-    empty_named_groups = new std::map<std::string, int>;
-    empty_group_names = new std::map<int, std::string>;
+    (void) new (empty_storage) EmptyStorage;
   });
 
-  pattern_.assign(pattern.data(), pattern.size());
+  pattern_ = new std::string(pattern);
   options_.Copy(options);
   entire_regexp_ = NULL;
-  error_ = empty_string;
-  error_code_ = NoError;
-  error_arg_.clear();
-  prefix_.clear();
-  prefix_foldcase_ = false;
   suffix_regexp_ = NULL;
-  prog_ = NULL;
+  error_ = empty_string();
+  error_arg_ = empty_string();
+
   num_captures_ = -1;
+  error_code_ = NoError;
+  longest_match_ = options_.longest_match();
   is_one_pass_ = false;
+  prefix_foldcase_ = false;
+  prefix_.clear();
+  prog_ = NULL;
 
   rprog_ = NULL;
   named_groups_ = NULL;
@@ -197,25 +223,29 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
 
   RegexpStatus status;
   entire_regexp_ = Regexp::Parse(
-    pattern_,
+    *pattern_,
     static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
     &status);
   if (entire_regexp_ == NULL) {
     if (options_.log_errors()) {
-      LOG(ERROR) << "Error parsing '" << trunc(pattern_) << "': "
+      LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
                  << status.Text();
     }
     error_ = new std::string(status.Text());
     error_code_ = RegexpErrorToRE2(status.code());
-    error_arg_ = std::string(status.error_arg());
+    error_arg_ = new std::string(status.error_arg());
     return;
   }
 
+  bool foldcase;
   re2::Regexp* suffix;
-  if (entire_regexp_->RequiredPrefix(&prefix_, &prefix_foldcase_, &suffix))
+  if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
+    prefix_foldcase_ = foldcase;
     suffix_regexp_ = suffix;
-  else
+  }
+  else {
     suffix_regexp_ = entire_regexp_->Incref();
+  }
 
   // Two thirds of the memory goes to the forward Prog,
   // one third to the reverse prog, because the forward
@@ -223,7 +253,7 @@ void RE2::Init(const StringPiece& pattern, const Options& options) {
   prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
   if (prog_ == NULL) {
     if (options_.log_errors())
-      LOG(ERROR) << "Error compiling '" << trunc(pattern_) << "'";
+      LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
     error_ = new std::string("pattern too large - compile failed");
     error_code_ = RE2::ErrorPatternTooLarge;
     return;
@@ -249,7 +279,8 @@ re2::Prog* RE2::ReverseProg() const {
         re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
     if (re->rprog_ == NULL) {
       if (re->options_.log_errors())
-        LOG(ERROR) << "Error reverse compiling '" << trunc(re->pattern_) << "'";
+        LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
+                   << "'";
       // We no longer touch error_ and error_code_ because failing to compile
       // the reverse Prog is not a showstopper: falling back to NFA execution
       // is fine. More importantly, an RE2 object is supposed to be logically
@@ -261,18 +292,21 @@ re2::Prog* RE2::ReverseProg() const {
 }
 
 RE2::~RE2() {
+  if (group_names_ != empty_group_names())
+    delete group_names_;
+  if (named_groups_ != empty_named_groups())
+    delete named_groups_;
+  delete rprog_;
+  delete prog_;
+  if (error_arg_ != empty_string())
+    delete error_arg_;
+  if (error_ != empty_string())
+    delete error_;
   if (suffix_regexp_)
     suffix_regexp_->Decref();
   if (entire_regexp_)
     entire_regexp_->Decref();
-  delete prog_;
-  delete rprog_;
-  if (error_ != empty_string)
-    delete error_;
-  if (named_groups_ != NULL && named_groups_ != empty_named_groups)
-    delete named_groups_;
-  if (group_names_ != NULL &&  group_names_ != empty_group_names)
-    delete group_names_;
+  delete pattern_;
 }
 
 int RE2::ProgramSize() const {
@@ -352,7 +386,7 @@ const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
     if (re->suffix_regexp_ != NULL)
       re->named_groups_ = re->suffix_regexp_->NamedCaptures();
     if (re->named_groups_ == NULL)
-      re->named_groups_ = empty_named_groups;
+      re->named_groups_ = empty_named_groups();
   }, this);
   return *named_groups_;
 }
@@ -363,7 +397,7 @@ const std::map<int, std::string>& RE2::CapturingGroupNames() const {
     if (re->suffix_regexp_ != NULL)
       re->group_names_ = re->suffix_regexp_->CaptureNames();
     if (re->group_names_ == NULL)
-      re->group_names_ = empty_group_names;
+      re->group_names_ = empty_group_names();
   }, this);
   return *group_names_;
 }
@@ -439,13 +473,10 @@ int RE2::GlobalReplace(std::string* str,
   const char* lastend = NULL;
   std::string out;
   int count = 0;
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-  // Iterate just once when fuzzing. Otherwise, we easily get bogged down
-  // and coverage is unlikely to improve despite significant expense.
-  while (p == str->data()) {
-#else
   while (p <= ep) {
-#endif
+    if (maximum_global_replace_count != -1 &&
+        count >= maximum_global_replace_count)
+      break;
     if (!re.Match(*str, static_cast<size_t>(p - str->data()),
                   str->size(), UNANCHORED, vec, nvec))
       break;
@@ -686,9 +717,8 @@ bool RE2::Match(const StringPiece& text,
   }
 
   Prog::Anchor anchor = Prog::kUnanchored;
-  Prog::MatchKind kind = Prog::kFirstMatch;
-  if (options_.longest_match())
-    kind = Prog::kLongestMatch;
+  Prog::MatchKind kind =
+      longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
 
   bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
   bool can_bit_state = prog_->CanBitState();
@@ -720,7 +750,7 @@ bool RE2::Match(const StringPiece& text,
           if (dfa_failed) {
             if (options_.log_errors())
               LOG(ERROR) << "DFA out of memory: "
-                         << "pattern length " << pattern_.size() << ", "
+                         << "pattern length " << pattern_->size() << ", "
                          << "program size " << prog->size() << ", "
                          << "list count " << prog->list_count() << ", "
                          << "bytemap range " << prog->bytemap_range();
@@ -740,7 +770,7 @@ bool RE2::Match(const StringPiece& text,
         if (dfa_failed) {
           if (options_.log_errors())
             LOG(ERROR) << "DFA out of memory: "
-                       << "pattern length " << pattern_.size() << ", "
+                       << "pattern length " << pattern_->size() << ", "
                        << "program size " << prog_->size() << ", "
                        << "list count " << prog_->list_count() << ", "
                        << "bytemap range " << prog_->bytemap_range();
@@ -766,7 +796,7 @@ bool RE2::Match(const StringPiece& text,
         if (dfa_failed) {
           if (options_.log_errors())
             LOG(ERROR) << "DFA out of memory: "
-                       << "pattern length " << pattern_.size() << ", "
+                       << "pattern length " << pattern_->size() << ", "
                        << "program size " << prog->size() << ", "
                        << "list count " << prog->list_count() << ", "
                        << "bytemap range " << prog->bytemap_range();
@@ -809,7 +839,7 @@ bool RE2::Match(const StringPiece& text,
         if (dfa_failed) {
           if (options_.log_errors())
             LOG(ERROR) << "DFA out of memory: "
-                       << "pattern length " << pattern_.size() << ", "
+                       << "pattern length " << pattern_->size() << ", "
                        << "program size " << prog_->size() << ", "
                        << "list count " << prog_->list_count() << ", "
                        << "bytemap range " << prog_->bytemap_range();

+ 48 - 34
contrib/libs/re2/re2/re2.h

@@ -66,17 +66,17 @@
 //    CHECK(RE2::FullMatch(latin1_string, RE2(latin1_pattern, RE2::Latin1)));
 //
 // -----------------------------------------------------------------------
-// MATCHING WITH SUBSTRING EXTRACTION:
+// SUBMATCH EXTRACTION:
 //
-// You can supply extra pointer arguments to extract matched substrings.
+// You can supply extra pointer arguments to extract submatches.
 // On match failure, none of the pointees will have been modified.
-// On match success, the substrings will be converted (as necessary) and
+// On match success, the submatches will be converted (as necessary) and
 // their values will be assigned to their pointees until all conversions
 // have succeeded or one conversion has failed.
 // On conversion failure, the pointees will be in an indeterminate state
 // because the caller has no way of knowing which conversion failed.
 // However, conversion cannot fail for types like string and StringPiece
-// that do not inspect the substring contents. Hence, in the common case
+// that do not inspect the submatch contents. Hence, in the common case
 // where all of the pointees are of such types, failure is always due to
 // match failure and thus none of the pointees will have been modified.
 //
@@ -100,10 +100,10 @@
 // Example: integer overflow causes failure
 //    CHECK(!RE2::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i));
 //
-// NOTE(rsc): Asking for substrings slows successful matches quite a bit.
+// NOTE(rsc): Asking for submatches slows successful matches quite a bit.
 // This may get a little faster in the future, but right now is slower
 // than PCRE.  On the other hand, failed matches run *very* fast (faster
-// than PCRE), as do matches without substring extraction.
+// than PCRE), as do matches without submatch extraction.
 //
 // -----------------------------------------------------------------------
 // PARTIAL MATCHES
@@ -275,23 +275,35 @@ class RE2 {
   // Need to have the const char* and const std::string& forms for implicit
   // conversions when passing string literals to FullMatch and PartialMatch.
   // Otherwise the StringPiece form would be sufficient.
-#ifndef SWIG
   RE2(const char* pattern);
   RE2(const std::string& pattern);
-#endif
   RE2(const StringPiece& pattern);
   RE2(const StringPiece& pattern, const Options& options);
   // ambiguity resolution.
   RE2(const TString& pattern) : RE2(StringPiece(pattern)) {}
   ~RE2();
 
+  // Not copyable.
+  // RE2 objects are expensive. You should probably use std::shared_ptr<RE2>
+  // instead. If you really must copy, RE2(first.pattern(), first.options())
+  // effectively does so: it produces a second object that mimics the first.
+  RE2(const RE2&) = delete;
+  RE2& operator=(const RE2&) = delete;
+  // Not movable.
+  // RE2 objects are thread-safe and logically immutable. You should probably
+  // use std::unique_ptr<RE2> instead. Otherwise, consider std::deque<RE2> if
+  // direct emplacement into a container is desired. If you really must move,
+  // be prepared to submit a design document along with your feature request.
+  RE2(RE2&&) = delete;
+  RE2& operator=(RE2&&) = delete;
+
   // Returns whether RE2 was created properly.
   bool ok() const { return error_code() == NoError; }
 
   // The string specification for this RE2.  E.g.
   //   RE2 re("ab*c?d+");
   //   re.pattern();    // "ab*c?d+"
-  const std::string& pattern() const { return pattern_; }
+  const std::string& pattern() const { return *pattern_; }
 
   // If RE2 could not be created properly, returns an error string.
   // Else returns the empty string.
@@ -303,7 +315,7 @@ class RE2 {
 
   // If RE2 could not be created properly, returns the offending
   // portion of the regexp.
-  const std::string& error_arg() const { return error_arg_; }
+  const std::string& error_arg() const { return *error_arg_; }
 
   // Returns the program size, a very approximate measure of a regexp's "cost".
   // Larger numbers are more expensive than smaller numbers.
@@ -336,7 +348,6 @@ class RE2 {
   static bool FindAndConsumeN(StringPiece* input, const RE2& re,
                               const Arg* const args[], int n);
 
-#ifndef SWIG
  private:
   template <typename F, typename SP>
   static inline bool Apply(F f, SP sp, const RE2& re) {
@@ -442,7 +453,6 @@ class RE2 {
   static bool FindAndConsume(StringPiece* input, const RE2& re, A&&... a) {
     return Apply(FindAndConsumeN, input, re, Arg(std::forward<A>(a))...);
   }
-#endif
 
   // Replace the first match of "re" in "str" with "rewrite".
   // Within "rewrite", backslash-escaped digits (\1 to \9) can be
@@ -698,11 +708,11 @@ class RE2 {
     };
 
     Options() :
+      max_mem_(kDefaultMaxMem),
       encoding_(EncodingUTF8),
       posix_syntax_(false),
       longest_match_(false),
       log_errors_(true),
-      max_mem_(kDefaultMaxMem),
       literal_(false),
       never_nl_(false),
       dot_nl_(false),
@@ -715,6 +725,9 @@ class RE2 {
 
     /*implicit*/ Options(CannedOptions);
 
+    int64_t max_mem() const { return max_mem_; }
+    void set_max_mem(int64_t m) { max_mem_ = m; }
+
     Encoding encoding() const { return encoding_; }
     void set_encoding(Encoding encoding) { encoding_ = encoding; }
 
@@ -727,9 +740,6 @@ class RE2 {
     bool log_errors() const { return log_errors_; }
     void set_log_errors(bool b) { log_errors_ = b; }
 
-    int64_t max_mem() const { return max_mem_; }
-    void set_max_mem(int64_t m) { max_mem_ = m; }
-
     bool literal() const { return literal_; }
     void set_literal(bool b) { literal_ = b; }
 
@@ -761,11 +771,11 @@ class RE2 {
     int ParseFlags() const;
 
    private:
+    int64_t max_mem_;
     Encoding encoding_;
     bool posix_syntax_;
     bool longest_match_;
     bool log_errors_;
-    int64_t max_mem_;
     bool literal_;
     bool never_nl_;
     bool dot_nl_;
@@ -787,6 +797,10 @@ class RE2 {
   template <typename T>
   static Arg Octal(T* ptr);
 
+  // Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
+  // FOR FUZZING ONLY.
+  static void FUZZING_ONLY_set_maximum_global_replace_count(int i);
+
  private:
   void Init(const StringPiece& pattern, const Options& options);
 
@@ -798,18 +812,23 @@ class RE2 {
 
   re2::Prog* ReverseProg() const;
 
-  std::string pattern_;         // string regular expression
-  Options options_;             // option flags
-  re2::Regexp* entire_regexp_;  // parsed regular expression
-  const std::string* error_;    // error indicator (or points to empty string)
-  ErrorCode error_code_;        // error code
-  std::string error_arg_;       // fragment of regexp showing error
-  std::string prefix_;          // required prefix (before suffix_regexp_)
-  bool prefix_foldcase_;        // prefix_ is ASCII case-insensitive
-  re2::Regexp* suffix_regexp_;  // parsed regular expression, prefix_ removed
-  re2::Prog* prog_;             // compiled program for regexp
-  int num_captures_;            // number of capturing groups
-  bool is_one_pass_;            // can use prog_->SearchOnePass?
+  // First cache line is relatively cold fields.
+  const std::string* pattern_;    // string regular expression
+  Options options_;               // option flags
+  re2::Regexp* entire_regexp_;    // parsed regular expression
+  re2::Regexp* suffix_regexp_;    // parsed regular expression, prefix_ removed
+  const std::string* error_;      // error indicator (or points to empty string)
+  const std::string* error_arg_;  // fragment of regexp showing error (or ditto)
+
+  // Second cache line is relatively hot fields.
+  // These are ordered oddly to pack everything.
+  int num_captures_;              // number of capturing groups
+  ErrorCode error_code_ : 29;     // error code (29 bits is more than enough)
+  bool longest_match_ : 1;        // cached copy of options_.longest_match()
+  bool is_one_pass_ : 1;          // can use prog_->SearchOnePass?
+  bool prefix_foldcase_ : 1;      // prefix_ is ASCII case-insensitive
+  std::string prefix_;            // required prefix (before suffix_regexp_)
+  re2::Prog* prog_;               // compiled program for regexp
 
   // Reverse Prog for DFA execution only
   mutable re2::Prog* rprog_;
@@ -821,9 +840,6 @@ class RE2 {
   mutable std::once_flag rprog_once_;
   mutable std::once_flag named_groups_once_;
   mutable std::once_flag group_names_once_;
-
-  RE2(const RE2&) = delete;
-  RE2& operator=(const RE2&) = delete;
 };
 
 /***** Implementation details *****/
@@ -954,7 +970,6 @@ inline RE2::Arg RE2::Octal(T* ptr) {
   });
 }
 
-#ifndef SWIG
 // Silence warnings about missing initializers for members of LazyRE2.
 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 6
 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
@@ -1005,7 +1020,6 @@ class LazyRE2 {
 
   void operator=(const LazyRE2&);  // disallowed
 };
-#endif
 
 namespace hooks {
 

+ 24 - 14
contrib/libs/re2/re2/regexp.cc

@@ -74,16 +74,27 @@ bool Regexp::QuickDestroy() {
   return false;
 }
 
-// Lazily allocated.
-static Mutex* ref_mutex;
-static std::map<Regexp*, int>* ref_map;
+// Similar to EmptyStorage in re2.cc.
+struct RefStorage {
+  Mutex ref_mutex;
+  std::map<Regexp*, int> ref_map;
+};
+alignas(RefStorage) static char ref_storage[sizeof(RefStorage)];
+
+static inline Mutex* ref_mutex() {
+  return &reinterpret_cast<RefStorage*>(ref_storage)->ref_mutex;
+}
+
+static inline std::map<Regexp*, int>* ref_map() {
+  return &reinterpret_cast<RefStorage*>(ref_storage)->ref_map;
+}
 
 int Regexp::Ref() {
   if (ref_ < kMaxRef)
     return ref_;
 
-  MutexLock l(ref_mutex);
-  return (*ref_map)[this];
+  MutexLock l(ref_mutex());
+  return (*ref_map())[this];
 }
 
 // Increments reference count, returns object as convenience.
@@ -91,18 +102,17 @@ Regexp* Regexp::Incref() {
   if (ref_ >= kMaxRef-1) {
     static std::once_flag ref_once;
     std::call_once(ref_once, []() {
-      ref_mutex = new Mutex;
-      ref_map = new std::map<Regexp*, int>;
+      (void) new (ref_storage) RefStorage;
     });
 
     // Store ref count in overflow map.
-    MutexLock l(ref_mutex);
+    MutexLock l(ref_mutex());
     if (ref_ == kMaxRef) {
       // already overflowed
-      (*ref_map)[this]++;
+      (*ref_map())[this]++;
     } else {
       // overflowing now
-      (*ref_map)[this] = kMaxRef;
+      (*ref_map())[this] = kMaxRef;
       ref_ = kMaxRef;
     }
     return this;
@@ -116,13 +126,13 @@ Regexp* Regexp::Incref() {
 void Regexp::Decref() {
   if (ref_ == kMaxRef) {
     // Ref count is stored in overflow map.
-    MutexLock l(ref_mutex);
-    int r = (*ref_map)[this] - 1;
+    MutexLock l(ref_mutex());
+    int r = (*ref_map())[this] - 1;
     if (r < kMaxRef) {
       ref_ = static_cast<uint16_t>(r);
-      ref_map->erase(this);
+      ref_map()->erase(this);
     } else {
-      (*ref_map)[this] = r;
+      (*ref_map())[this] = r;
     }
     return;
   }

+ 2 - 2
contrib/libs/re2/re2/set.cc

@@ -128,9 +128,9 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
 bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
                      ErrorInfo* error_info) const {
   if (!compiled_) {
-    LOG(DFATAL) << "RE2::Set::Match() called before compiling";
     if (error_info != NULL)
       error_info->kind = kNotCompiled;
+    LOG(DFATAL) << "RE2::Set::Match() called before compiling";
     return false;
   }
 #ifdef RE2_HAVE_THREAD_LOCAL
@@ -161,9 +161,9 @@ bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
   }
   if (v != NULL) {
     if (matches->empty()) {
-      LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
       if (error_info != NULL)
         error_info->kind = kInconsistent;
+      LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
       return false;
     }
     v->assign(matches->begin(), matches->end());

Some files were not shown because too many files changed in this diff