Browse Source

[yt provider] Fix join output sort mismatch (#8896)

Roman Udovichenko 6 months ago
parent
commit
05490267ae

+ 14 - 7
ydb/library/yql/providers/yt/provider/yql_yt_join_impl.cpp

@@ -742,15 +742,22 @@ TVector<TString> MatchSort(const THashSet<TString>& desiredKeys, const TVector<T
 TVector<TStringBuf> MatchSort(TTypeAnnotationNode::TListType& types, const TVector<TStringBuf>& desiredSort, const TVector<TEquivKeys>& sideSort) {
     TVector<TStringBuf> result;
     types.clear();
-    for (size_t i = 0, j = 0; i < desiredSort.size() && j < sideSort.size(); ++i) {
-        auto key = desiredSort[i];
-        if (sideSort[j].Keys.contains(key) ||
-            (j + 1 < sideSort.size() && sideSort[++j].Keys.contains(key)))
-        {
+    for (size_t i = 0, j = 0; i < desiredSort.size() && j < sideSort.size(); ++j) {
+        auto key = desiredSort[i++];
+        if (!sideSort[j].Keys.contains(key)) {
+            break;
+        }
+        while (true) {
             result.push_back(key);
             types.push_back(sideSort[j].Type);
-        } else {
-            break;
+            if (i >= desiredSort.size()) {
+                break;
+            }
+            key = desiredSort[i];
+            if (!sideSort[j].Keys.contains(key)) {
+                break;
+            }
+            i++;
         }
     }
 

+ 3 - 3
ydb/library/yql/tests/sql/dq_file/part11/canondata/result.json

@@ -1482,9 +1482,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Debug]": [
         {
-            "checksum": "484689bfd724ff1a5770f22f79b44981",
-            "size": 6091,
-            "uri": "https://{canondata_backend}/1936273/640ea425b9d5a6140c315077f2a83bba387482d8/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql_patched"
+            "checksum": "b873289bbdf18c93358cda737440f57b",
+            "size": 6181,
+            "uri": "https://{canondata_backend}/1936947/ef3e5fbc5fb23bc80e348df0815b2958ed5e589d/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql_patched"
         }
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Plan]": [

+ 6 - 6
ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json

@@ -1433,9 +1433,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested-off-Debug]": [
         {
-            "checksum": "6eb64854f37c9ca3064f349cd4d5fac7",
-            "size": 4879,
-            "uri": "https://{canondata_backend}/1936273/b293975a7642b91c5614f8db12d1bd08a0069400/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested-off-Debug_/opt.yql_patched"
+            "checksum": "018df7cab8227fcaf97acf77b0d70cd7",
+            "size": 4969,
+            "uri": "https://{canondata_backend}/1784117/bb10ae9ea87fb7aac538ebffcd58fdc507d9f394/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested-off-Debug_/opt.yql_patched"
         }
     ],
     "test.test[join-mergejoin_saves_output_sort_nested-off-Plan]": [
@@ -1447,9 +1447,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested-off-Results]": [
         {
-            "checksum": "55f665e618c06a74cae546d0c6316a92",
-            "size": 2751,
-            "uri": "https://{canondata_backend}/1946324/cf38a9e18bcb2d145a9ceedb60a30cd36c433437/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested-off-Results_/results.txt"
+            "checksum": "b7fbb9917064044d562432a3c2cfc59c",
+            "size": 3499,
+            "uri": "https://{canondata_backend}/1784117/bb10ae9ea87fb7aac538ebffcd58fdc507d9f394/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested-off-Results_/results.txt"
         }
     ],
     "test.test[join-mergejoin_small_primary--Analyze]": [

+ 3 - 3
ydb/library/yql/tests/sql/hybrid_file/part7/canondata/result.json

@@ -1317,9 +1317,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Debug]": [
         {
-            "checksum": "029e1ad017d56921523dfee5515ba90e",
-            "size": 6295,
-            "uri": "https://{canondata_backend}/1936842/15d1b251a19a947bc78bcd914d26903ce91d665f/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql_patched"
+            "checksum": "761a9cd057e1a4aab623b3b4411d0ba0",
+            "size": 6407,
+            "uri": "https://{canondata_backend}/1931696/e1e81addd8ea3e15863a8ba2a48dd9580611eaa7/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql_patched"
         }
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Plan]": [

+ 20 - 6
ydb/library/yql/tests/sql/sql2yql/canondata/result.json

@@ -9080,9 +9080,16 @@
     ],
     "test_sql2yql.test[join-mergejoin_saves_output_sort_nested]": [
         {
-            "checksum": "f35f42ed698e1adc183f00736d1bc9ef",
-            "size": 2608,
-            "uri": "https://{canondata_backend}/1936947/659b615f15086142a8960946dabd06b519d43335/resource.tar.gz#test_sql2yql.test_join-mergejoin_saves_output_sort_nested_/sql.yql"
+            "checksum": "3ea2ef45dfabc180e2a75d194525fbb6",
+            "size": 2837,
+            "uri": "https://{canondata_backend}/1936947/7dabc5ea0642eb49e4a3155ef894e1670ac842fb/resource.tar.gz#test_sql2yql.test_join-mergejoin_saves_output_sort_nested_/sql.yql"
+        }
+    ],
+    "test_sql2yql.test[join-mergejoin_saves_output_sort_unmatched]": [
+        {
+            "checksum": "0efba072937fd539eb7198ed2a648de3",
+            "size": 2070,
+            "uri": "https://{canondata_backend}/1936947/7dabc5ea0642eb49e4a3155ef894e1670ac842fb/resource.tar.gz#test_sql2yql.test_join-mergejoin_saves_output_sort_unmatched_/sql.yql"
         }
     ],
     "test_sql2yql.test[join-mergejoin_semi_composite_to_inner]": [
@@ -28680,9 +28687,16 @@
     ],
     "test_sql_format.test[join-mergejoin_saves_output_sort_nested]": [
         {
-            "checksum": "2dd95869b8704fc3a26c230cb6fa72ec",
-            "size": 458,
-            "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_join-mergejoin_saves_output_sort_nested_/formatted.sql"
+            "checksum": "a3a8e3bf45de1c78b7f5898229a63eb7",
+            "size": 498,
+            "uri": "https://{canondata_backend}/1936947/7dabc5ea0642eb49e4a3155ef894e1670ac842fb/resource.tar.gz#test_sql_format.test_join-mergejoin_saves_output_sort_nested_/formatted.sql"
+        }
+    ],
+    "test_sql_format.test[join-mergejoin_saves_output_sort_unmatched]": [
+        {
+            "checksum": "d95699a4e35a5f8c26f14fe749fb7f88",
+            "size": 376,
+            "uri": "https://{canondata_backend}/1936947/7dabc5ea0642eb49e4a3155ef894e1670ac842fb/resource.tar.gz#test_sql_format.test_join-mergejoin_saves_output_sort_unmatched_/formatted.sql"
         }
     ],
     "test_sql_format.test[join-mergejoin_semi_composite_to_inner]": [

+ 2 - 2
ydb/library/yql/tests/sql/suites/join/mergejoin_saves_output_sort_nested.sql

@@ -7,6 +7,6 @@ pragma yt.JoinAllowColumnRenames="true";
 
 FROM Input1 AS a JOIN Input2 AS b ON b.k2 = a.k1 AND a.v1 = b.v2
                  JOIN Input3 AS c ON a.k1 = c.k3 AND a.v1 = c.v3
-SELECT c.k3 AS ck3, c.k3 AS ck3_extra, c.v3, a.k1 as ak1
-ORDER BY ck3, ck3_extra, c.v3 -- should be noop
+SELECT c.k3 AS ck3, c.k3 AS ck3_extra, c.k3 AS ck3_extra2, c.v3, a.k1 as ak1
+ORDER BY ck3, ck3_extra, ck3_extra2, c.v3 -- should be noop
 ;

+ 3 - 0
ydb/library/yql/tests/sql/suites/join/mergejoin_saves_output_sort_unmatched.cfg

@@ -0,0 +1,3 @@
+in Input1 kv1_sorted.txt
+in Input2 kv2_sorted.txt
+providers yt

+ 11 - 0
ydb/library/yql/tests/sql/suites/join/mergejoin_saves_output_sort_unmatched.sql

@@ -0,0 +1,11 @@
+/* ignore yt detailed plan diff */
+PRAGMA DisableSimpleColumns;
+use plato;
+pragma yt.JoinMergeTablesLimit="10";
+pragma yt.JoinMergeUnsortedFactor="0";
+pragma yt.JoinAllowColumnRenames="true";
+
+FROM Input1 AS a JOIN Input2 AS b ON b.k2 = a.k1 AND a.v1 = b.v2
+SELECT b.k2 AS bk2, b.v2 as bv2, a.k1 as ak1
+ORDER BY bv2 -- should be a separate sort
+;

+ 6 - 6
ydb/library/yql/tests/sql/yt_native_file/part11/canondata/result.json

@@ -1464,9 +1464,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Debug]": [
         {
-            "checksum": "6c0b54b4d5c81e157aeb1cba9c35e150",
-            "size": 6205,
-            "uri": "https://{canondata_backend}/1937027/642fd2ff53bdb0fed32ca89598d70c9c5848ac20/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql"
+            "checksum": "64e70e367c6be884a9d2483db109d621",
+            "size": 6317,
+            "uri": "https://{canondata_backend}/1689644/9c7ca5e1365979deb5796cbc598033aba0c9dfe8/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Debug_/opt.yql"
         }
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Plan]": [
@@ -1478,9 +1478,9 @@
     ],
     "test.test[join-mergejoin_saves_output_sort_nested--Results]": [
         {
-            "checksum": "55f665e618c06a74cae546d0c6316a92",
-            "size": 2751,
-            "uri": "https://{canondata_backend}/1689644/8bfa47b4b6b4d6f6543bfef6c07a8937dfb0470d/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Results_/results.txt"
+            "checksum": "b7fbb9917064044d562432a3c2cfc59c",
+            "size": 3499,
+            "uri": "https://{canondata_backend}/1689644/9c7ca5e1365979deb5796cbc598033aba0c9dfe8/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_nested--Results_/results.txt"
         }
     ],
     "test.test[join-premap_common_semi--Debug]": [

+ 21 - 0
ydb/library/yql/tests/sql/yt_native_file/part4/canondata/result.json

@@ -1120,6 +1120,27 @@
             "uri": "https://{canondata_backend}/1936997/9b8859f70925a58b024145127cca3e8e612258c0/resource.tar.gz#test.test_join-mergejoin_narrows_output_sort--Results_/results.txt"
         }
     ],
+    "test.test[join-mergejoin_saves_output_sort_unmatched--Debug]": [
+        {
+            "checksum": "ef434b7bb58bb0cf2546d5cead68bc8e",
+            "size": 4251,
+            "uri": "https://{canondata_backend}/1847551/821dcc1a241d19f560b4668c917728c6a41f6efe/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_unmatched--Debug_/opt.yql"
+        }
+    ],
+    "test.test[join-mergejoin_saves_output_sort_unmatched--Plan]": [
+        {
+            "checksum": "2bd47fb7e331f35e11910c724901c0f1",
+            "size": 7373,
+            "uri": "https://{canondata_backend}/1847551/821dcc1a241d19f560b4668c917728c6a41f6efe/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_unmatched--Plan_/plan.txt"
+        }
+    ],
+    "test.test[join-mergejoin_saves_output_sort_unmatched--Results]": [
+        {
+            "checksum": "69686be9388d34437592ca36885b6045",
+            "size": 3771,
+            "uri": "https://{canondata_backend}/1847551/821dcc1a241d19f560b4668c917728c6a41f6efe/resource.tar.gz#test.test_join-mergejoin_saves_output_sort_unmatched--Results_/results.txt"
+        }
+    ],
     "test.test[join-mergejoin_sorts_output_for_sort_nomatch--Debug]": [
         {
             "checksum": "edaa86217c5f484f85bfe7db135ac8bb",