Browse Source

Update contrib/libs/re2 to 2024-02-01

robot-contrib 1 year ago
parent
commit
2ed9104e38

+ 0 - 13
contrib/libs/re2/AUTHORS

@@ -1,13 +0,0 @@
-# This is the official list of RE2 authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-#	Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Google Inc.
-Samsung Electronics
-Stefano Rivera <stefano.rivera@gmail.com>

+ 5 - 3
contrib/libs/re2/re2/dfa.cc

@@ -1367,7 +1367,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
     lastmatch = p;
     if (ExtraDebug)
       absl::FPrintF(stderr, "match @stx! [%s]\n", DumpState(s));
-    if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+    if (params->matches != NULL) {
       for (int i = s->ninst_ - 1; i >= 0; i--) {
         int id = s->inst_[i];
         if (id == MatchSep)
@@ -1484,7 +1484,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
         lastmatch = p + 1;
       if (ExtraDebug)
         absl::FPrintF(stderr, "match @%d! [%s]\n", lastmatch - bp, DumpState(s));
-      if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+      if (params->matches != NULL) {
         for (int i = s->ninst_ - 1; i >= 0; i--) {
           int id = s->inst_[i];
           if (id == MatchSep)
@@ -1551,7 +1551,7 @@ inline bool DFA::InlinedSearchLoop(SearchParams* params) {
     lastmatch = p;
     if (ExtraDebug)
       absl::FPrintF(stderr, "match @etx! [%s]\n", DumpState(s));
-    if (params->matches != NULL && kind_ == Prog::kManyMatch) {
+    if (params->matches != NULL) {
       for (int i = s->ninst_ - 1; i >= 0; i--) {
         int id = s->inst_[i];
         if (id == MatchSep)
@@ -1767,6 +1767,8 @@ bool DFA::Search(absl::string_view text, absl::string_view context,
   params.anchored = anchored;
   params.want_earliest_match = want_earliest_match;
   params.run_forward = run_forward;
+  // matches should be null except when using RE2::Set.
+  DCHECK(matches == NULL || kind_ == Prog::kManyMatch);
   params.matches = matches;
 
   if (!AnalyzeSearch(&params)) {

+ 22 - 1
contrib/libs/re2/re2/parse.cc

@@ -1177,7 +1177,17 @@ void FactorAlternationImpl::Round3(Regexp** sub, int nsub,
           for (CharClass::iterator it = cc->begin(); it != cc->end(); ++it)
             ccb.AddRange(it->lo, it->hi);
         } else if (re->op() == kRegexpLiteral) {
-          ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+          if (re->parse_flags() & Regexp::FoldCase) {
+            // AddFoldedRange() can terminate prematurely if the character class
+            // already contains the rune. For example, if it contains 'a' and we
+            // want to add folded 'a', it sees 'a' and stops without adding 'A'.
+            // To avoid that, we use an empty character class and then merge it.
+            CharClassBuilder tmp;
+            tmp.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+            ccb.AddCharClass(&tmp);
+          } else {
+            ccb.AddRangeFlags(re->rune(), re->rune(), re->parse_flags());
+          }
         } else {
           LOG(DFATAL) << "RE2: unexpected op: " << re->op() << " "
                       << re->ToString();
@@ -2060,6 +2070,17 @@ bool Regexp::ParseState::ParsePerlFlags(absl::string_view* s) {
     return false;
   }
 
+  // Check for look-around assertions. This is NOT because we support them! ;)
+  // As per https://github.com/google/re2/issues/468, we really want to report
+  // kRegexpBadPerlOp (not kRegexpBadNamedCapture) for look-behind assertions.
+  // Additionally, it would be nice to report not "(?<", but "(?<=" or "(?<!".
+  if ((t.size() > 3 && (t[2] == '=' || t[2] == '!')) ||
+      (t.size() > 4 && t[2] == '<' && (t[3] == '=' || t[3] == '!'))) {
+    status_->set_code(kRegexpBadPerlOp);
+    status_->set_error_arg(absl::string_view(t.data(), t[2] == '<' ? 4 : 3));
+    return false;
+  }
+
   // Check for named captures, first introduced in Python's regexp library.
   // As usual, there are three slightly different syntaxes:
   //

+ 2 - 3
contrib/libs/re2/re2/re2.cc

@@ -9,7 +9,6 @@
 
 #include "re2/re2.h"
 
-#include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #ifdef _MSC_VER
@@ -454,8 +453,8 @@ bool RE2::Replace(std::string* str,
   if (!re.Rewrite(&s, rewrite, vec, nvec))
     return false;
 
-  assert(vec[0].data() >= str->data());
-  assert(vec[0].data() + vec[0].size() <= str->data() + str->size());
+  DCHECK_GE(vec[0].data(), str->data());
+  DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
   str->replace(vec[0].data() - str->data(), vec[0].size(), s);
   return true;
 }

+ 33 - 0
contrib/libs/re2/re2/testing/parse_test.cc

@@ -356,6 +356,13 @@ Test prefix_tests[] = {
     "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
     "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
     "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
+  // As per https://github.com/google/re2/issues/467,
+  // these should factor identically, but they didn't
+  // because AddFoldedRange() terminated prematurely.
+  { "0A|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+  { "0a|0[aA]", "cat{lit{0}cc{0x41 0x61}}" },
+  { "0[aA]|0A", "cat{lit{0}cc{0x41 0x61}}" },
+  { "0[aA]|0a", "cat{lit{0}cc{0x41 0x61}}" },
 };
 
 // Test that prefix factoring works.
@@ -525,4 +532,30 @@ TEST(NamedCaptures, ErrorArgs) {
   EXPECT_EQ(status.error_arg(), "(?<space bar>");
 }
 
+// Test that look-around error args are correct.
+TEST(LookAround, ErrorArgs) {
+  RegexpStatus status;
+  Regexp* re;
+
+  re = Regexp::Parse("(?=foo).*", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+  EXPECT_EQ(status.error_arg(), "(?=");
+
+  re = Regexp::Parse("(?!foo).*", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+  EXPECT_EQ(status.error_arg(), "(?!");
+
+  re = Regexp::Parse("(?<=foo).*", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+  EXPECT_EQ(status.error_arg(), "(?<=");
+
+  re = Regexp::Parse("(?<!foo).*", Regexp::LikePerl, &status);
+  EXPECT_TRUE(re == NULL);
+  EXPECT_EQ(status.code(), kRegexpBadPerlOp);
+  EXPECT_EQ(status.error_arg(), "(?<!");
+}
+
 }  // namespace re2

+ 2 - 2
contrib/libs/re2/ya.make

@@ -9,9 +9,9 @@ LICENSE(
 
 LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
 
-VERSION(2023-11-01)
+VERSION(2024-02-01)
 
-ORIGINAL_SOURCE(https://github.com/google/re2/archive/2023-11-01.tar.gz)
+ORIGINAL_SOURCE(https://github.com/google/re2/archive/2024-02-01.tar.gz)
 
 PEERDIR(
     contrib/restricted/abseil-cpp/absl/base