Browse Source

Remove empty column groups
commit_hash:6acbcc5492b78c6f8716669a42353da7b605e8a0

udovichenko-r 1 month ago
parent
commit
a635344002

+ 10 - 6
yt/yql/providers/yt/provider/yql_yt_datasink.cpp

@@ -266,12 +266,16 @@ public:
             }
             if (auto columnGroup = NYql::GetSetting(*res->Child(TYtWriteTable::idx_Settings), EYtSettingType::ColumnGroups)) {
                 const TString normalized = NormalizeColumnGroupSpec(columnGroup->Tail().Content());
-                res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings,
-                    NYql::UpdateSettingValue(*res->Child(TYtWriteTable::idx_Settings),
-                        EYtSettingType::ColumnGroups,
-                        ctx.NewAtom(res->Child(TYtWriteTable::idx_Settings)->Pos(), normalized, TNodeFlags::MultilineContent),
-                        ctx)
-                    );
+                if (normalized) {
+                    res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings,
+                        NYql::UpdateSettingValue(*res->Child(TYtWriteTable::idx_Settings),
+                            EYtSettingType::ColumnGroups,
+                            ctx.NewAtom(res->Child(TYtWriteTable::idx_Settings)->Pos(), normalized, TNodeFlags::MultilineContent),
+                            ctx)
+                        );
+                } else {
+                    res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings, NYql::RemoveSetting(*res->Child(TYtWriteTable::idx_Settings), EYtSettingType::ColumnGroups, ctx));
+                }
             } else if (NYql::HasSetting(*res->Child(TYtWriteTable::idx_Table)->Child(TYtTable::idx_Settings), EYtSettingType::Anonymous)) {
                 if (const auto mode = State_->Configuration->ColumnGroupMode.Get().GetOrElse(EColumnGroupMode::Disable); mode != EColumnGroupMode::Disable) {
                     res = ctx.ChangeChild(*res, TYtWriteTable::idx_Settings,

+ 3 - 0
yt/yql/providers/yt/provider/yql_yt_op_settings.cpp

@@ -1005,6 +1005,9 @@ bool ValidateColumnGroups(const TExprNode& setting, const TStructExprType& rowTy
 TString NormalizeColumnGroupSpec(const TStringBuf spec) {
     try {
         auto columnGroups = NYT::NodeFromYsonString(spec);
+        if (columnGroups.AsMap().empty()) {
+            return {};
+        }
         for (auto& grp: columnGroups.AsMap()) {
             if (!grp.second.IsEntity()) {
                 std::stable_sort(grp.second.AsList().begin(), grp.second.AsList().end(), [](const auto& l, const auto& r) { return l.AsString() < r.AsString(); });

+ 1 - 0
yt/yql/tests/sql/suites/column_group/hint-disable.cfg

@@ -3,6 +3,7 @@ out Output1 output1.txt
 out Output2 output2.txt
 out Output3 output3.txt
 out Output4 output4.txt
+out Output5 output5.txt
 providers yt
 pragma yt.ColumnGroupMode="disable"
 pragma yt.OptimizeFor="scan"

+ 1 - 0
yt/yql/tests/sql/suites/column_group/hint-perusage.cfg

@@ -3,6 +3,7 @@ out Output1 output1.txt
 out Output2 output2.txt
 out Output3 output3.txt
 out Output4 output4.txt
+out Output5 output5.txt
 providers yt
 pragma yt.ColumnGroupMode="perusage"
 pragma yt.OptimizeFor="scan"

+ 1 - 0
yt/yql/tests/sql/suites/column_group/hint-single.cfg

@@ -3,6 +3,7 @@ out Output1 output1.txt
 out Output2 output2.txt
 out Output3 output3.txt
 out Output4 output4.txt
+out Output5 output5.txt
 providers yt
 pragma yt.ColumnGroupMode="single"
 pragma yt.OptimizeFor="scan"

+ 3 - 0
yt/yql/tests/sql/suites/column_group/hint.sql

@@ -4,6 +4,7 @@ $i1 = select * from Input where a > "a"; -- several publish consumers with same
 $i2 = select * from Input where a > "a1"; -- several publish consumers with different groups
 $i3 = select * from Input where a < "a2"; -- several consumers including publish
 $i4 = select * from Input where a != "a"; -- several publish consumers with and without groups
+$i5 = select * from Input where a != "b"; -- single publish consumer with with no groups (special case)
 
 -- test column group spec normalization
 insert into Output1 with column_groups="{g1=[a;b;c];def=#}" select * from $i1;
@@ -17,4 +18,6 @@ insert into Output3 with column_groups="{g1=[a;b;c];def=#}" select * from $i4;
 
 insert into Output4 select * from $i4;
 
+insert into Output5 with column_groups="{}" select * from $i5;
+
 select a,b,c,d from $i3;