Browse Source

YQL-16462: add diagnostic on duplicate capturing group names

fedor-miron 1 year ago
parent
commit
bbf2b6878a

+ 6 - 0
ydb/library/yql/udfs/common/re2/re2_udf.cpp

@@ -457,6 +457,7 @@ namespace {
                 const auto& groupNames = regexp.CapturingGroupNames();
                 int groupCount = regexp.NumberOfCapturingGroups();
                 if (groupCount >= 0) {
+                    std::unordered_set<std::string_view> groupNamesSet;
                     int unnamedCount = 0;
                     ++groupCount;
                     groups.Indexes.resize(groupCount);
@@ -465,6 +466,11 @@ namespace {
                         TString fieldName;
                         auto it = groupNames.find(i);
                         if (it != groupNames.end()) {
+                            if (!groupNamesSet.insert(it->second).second) {
+                                builder.SetError(
+                                    TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second);
+                                return;
+                            }
                             fieldName = it->second;
                         } else {
                             fieldName = "_" + ToString(unnamedCount);

+ 5 - 0
ydb/library/yql/udfs/common/re2/test/canondata/result.json

@@ -19,6 +19,11 @@
             "uri": "file://test.test_DefOptions_/results.txt"
         }
     ],
+    "test.test[MultipleCaptureGroups]": [
+        {
+            "uri": "file://test.test_MultipleCaptureGroups_/extracted"
+        }
+    ],
     "test.test[MutableLambda]": [
         {
             "uri": "file://test.test_MutableLambda_/results.txt"

+ 14 - 0
ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted

@@ -0,0 +1,14 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+    <tmp_path>/program.sql:<main>:8:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+    	select $regexp("abc");
+	^
+        <tmp_path>/program.sql:<main>:8:8: Error: At function: Apply
+        	select $regexp("abc");
+	       ^
+            <tmp_path>/program.sql:<main>:4:16: Error: At function: Udf, At Re2.Capture
+            	$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+	               ^
+                <tmp_path>/program.sql:<main>:4:16: Error: Failed to find UDF function: Re2.Capture, reason: Error: Module: Re2, function: Capture, error: Regexp contains duplicate capturing group name: groupname1
+                	$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+	               ^

+ 1 - 0
ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg

@@ -0,0 +1 @@
+xfail

+ 4 - 0
ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql

@@ -0,0 +1,4 @@
+/* syntax version 1 */
+$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+
+select $regexp("abc");