Browse Source

Do not use blockreader on weak schema tables (#7566)

Александр Новожилов 7 months ago
parent
commit
ab1f7e3b85

+ 1 - 0
ydb/library/yql/providers/yt/comp_nodes/dq/dq_yt_block_reader.cpp

@@ -310,6 +310,7 @@ public:
         YQL_ENSURE(batch);
         MKQL_ADD_STAT(JobStats_, BlockCount, 1);
         std::vector<arrow::Datum> result;
+        YQL_ENSURE((size_t)batch->num_columns() == ColumnConverters_.size());
         result.resize(ColumnConverters_.size());
         size_t matchedColumns = 0;
         for (size_t i = 0; i < ColumnConverters_.size(); ++i) {

+ 4 - 0
ydb/library/yql/providers/yt/gateway/file/yql_yt_file.cpp

@@ -1142,6 +1142,10 @@ private:
             req.Table(), attrs, req.IgnoreYamrDsv(), req.IgnoreWeakSchema()
         );
 
+        if (attrs.AsMap().contains("schema_mode") && attrs["schema_mode"].AsString() == "weak") {
+            info.Attrs["schema_mode"] = attrs["schema_mode"].AsString();
+        }
+
         NYT::TNode schemaAttrs;
         if (req.ForceInferSchema() && req.InferSchemaRows() > 0) {
             info.Attrs.erase(YqlRowSpecAttribute);

+ 3 - 0
ydb/library/yql/providers/yt/gateway/native/yql_yt_native.cpp

@@ -2538,6 +2538,9 @@ private:
                 if (attrs.AsMap().contains("optimize_for") && attrs["optimize_for"].AsString() != "scan") {
                     metaInfo->Attrs["optimize_for"] = attrs["optimize_for"].AsString();
                 }
+                if (attrs.AsMap().contains("schema_mode") && attrs["schema_mode"].AsString() == "weak") {
+                    metaInfo->Attrs["schema_mode"] = attrs["schema_mode"].AsString();
+                }
                 if (attrs.AsMap().contains(SecurityTagsName)) {
                     TVector<TString> securityTags;
                     for (const auto& tag : attrs[SecurityTagsName].AsList()) {

+ 8 - 0
ydb/library/yql/providers/yt/provider/yql_yt_dq_integration.cpp

@@ -515,6 +515,14 @@ public:
         const TYtSectionList& sectionList = wrap.Input().Cast<TYtReadTable>().Input();
         for (size_t i = 0; i < sectionList.Size(); ++i) {
             auto section = sectionList.Item(i);
+            auto paths = section.Paths();
+            for (const auto& path : section.Paths()) {
+                auto meta = TYtTableBaseInfo::GetMeta(path.Table());
+                if (meta->Attrs.contains("schema_mode") && meta->Attrs["schema_mode"] == "weak") {
+                    BlockReaderAddInfo(ctx, ctx.GetPosition(node.Pos()), "can't use block reader on tables with weak schema");
+                    return false;
+                }
+            }
             if (!NYql::GetSettingAsColumnList(section.Settings().Ref(), EYtSettingType::SysColumns).empty()) {
                 BlockReaderAddInfo(ctx, ctx.GetPosition(node.Pos()), "system column");
                 return false;

+ 6 - 6
ydb/library/yql/tests/s-expressions/yt_native_file/part4/canondata/result.json

@@ -3615,9 +3615,9 @@
     ],
     "test.test[Scheme-IgnoreWeakSchemaForceInfer-Debug]": [
         {
-            "checksum": "cc19159e66bb4b15cf330e1c76f1b94e",
-            "size": 1047,
-            "uri": "https://{canondata_backend}/1809005/bb560db2ac536757475d6779b427321ab1d72d73/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaForceInfer-Debug_/opt.yql"
+            "checksum": "78697c6d31cbf9c8160cc7d96917f3dc",
+            "size": 1086,
+            "uri": "https://{canondata_backend}/1937429/2682c8a03858172f9dad8df2693362bf0f706d46/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaForceInfer-Debug_/opt.yql"
         }
     ],
     "test.test[Scheme-IgnoreWeakSchemaForceInfer-Plan]": [
@@ -3629,9 +3629,9 @@
     ],
     "test.test[Scheme-IgnoreWeakSchemaForceInfer-Results]": [
         {
-            "checksum": "9dbc94fa72f7c2539bc56046802d1e65",
-            "size": 4950,
-            "uri": "https://{canondata_backend}/212715/879d99eb75d342eeb6cf2b58f2993892131bc9f3/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaForceInfer-Results_/results.txt"
+            "checksum": "82ba27b9c664d3a5bdc1c30d35012dad",
+            "size": 5018,
+            "uri": "https://{canondata_backend}/1937429/2682c8a03858172f9dad8df2693362bf0f706d46/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaForceInfer-Results_/results.txt"
         }
     ],
     "test.test[TimeOrderRecover-ahead-Debug]": [

+ 6 - 6
ydb/library/yql/tests/s-expressions/yt_native_file/part8/canondata/result.json

@@ -4089,9 +4089,9 @@
     ],
     "test.test[Scheme-IgnoreWeakSchemaInfer-Debug]": [
         {
-            "checksum": "e63c356651c75dc70c65541367229ebc",
-            "size": 1042,
-            "uri": "https://{canondata_backend}/1942278/4c15a9cfa410297d35d23e1a39d5d898babd2365/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaInfer-Debug_/opt.yql"
+            "checksum": "5bab2767b3e0d088d51100c7aeed0d67",
+            "size": 1081,
+            "uri": "https://{canondata_backend}/212715/69d6d07c474bcd145a6dcc79b47a46750f970a4b/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaInfer-Debug_/opt.yql"
         }
     ],
     "test.test[Scheme-IgnoreWeakSchemaInfer-Plan]": [
@@ -4103,9 +4103,9 @@
     ],
     "test.test[Scheme-IgnoreWeakSchemaInfer-Results]": [
         {
-            "checksum": "9dbc94fa72f7c2539bc56046802d1e65",
-            "size": 4950,
-            "uri": "https://{canondata_backend}/1881367/dc31ea12cef2a161dc30af1f36c0f4693d88cfbf/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaInfer-Results_/results.txt"
+            "checksum": "82ba27b9c664d3a5bdc1c30d35012dad",
+            "size": 5018,
+            "uri": "https://{canondata_backend}/212715/69d6d07c474bcd145a6dcc79b47a46750f970a4b/resource.tar.gz#test.test_Scheme-IgnoreWeakSchemaInfer-Results_/results.txt"
         }
     ],
     "test.test[Scheme-IgnoreWeakSchemaOnWeak-Debug]": [],