Browse Source

dq: add cbo cost functions (#7617)

yumkam 7 months ago
parent
commit
d1f17fc6e4

+ 69 - 1
ydb/library/yql/providers/dq/opt/logical_optimize.cpp

@@ -38,6 +38,74 @@ bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) {
 
 }
 
+/**
+ * DQ Specific cost function and join applicability cost function
+*/
+struct TDqCBOProviderContext : public NYql::TBaseProviderContext {
+    TDqCBOProviderContext(TTypeAnnotationContext& typeCtx, const TDqConfiguration::TPtr& config)
+        : NYql::TBaseProviderContext()
+        , Config(config)
+        , TypesCtx(typeCtx) {}
+
+    virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
+        const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
+        const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
+        const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
+        NYql::EJoinAlgoType joinAlgo,  NYql::EJoinKind joinKind) override;
+
+    virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;
+
+    TDqConfiguration::TPtr Config;
+    TTypeAnnotationContext& TypesCtx;
+};
+
+
+bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
+        const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
+        const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
+        const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
+        NYql::EJoinAlgoType joinAlgo,  NYql::EJoinKind joinKind) {
+    Y_UNUSED(left);
+    Y_UNUSED(right);
+    Y_UNUSED(joinConditions);
+    Y_UNUSED(leftJoinKeys);
+    Y_UNUSED(rightJoinKeys);
+
+    switch(joinAlgo) {
+
+    case EJoinAlgoType::MapJoin:
+        if (joinKind == EJoinKind::OuterJoin || joinKind == EJoinKind::Exclusion)
+            return false;
+        if (auto hashJoinMode = Config->HashJoinMode.Get().GetOrElse(EHashJoinMode::Off);
+                hashJoinMode == EHashJoinMode::Off || hashJoinMode == EHashJoinMode::Map)
+            return true;
+        break;
+
+    case EJoinAlgoType::GraceJoin:
+        return true;
+
+    default:
+        break;
+    }
+    return false;
+}
+
+
+double TDqCBOProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const  {
+    Y_UNUSED(outputByteSize);
+
+    switch(joinAlgo) {
+        case EJoinAlgoType::MapJoin:
+            return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows);
+        case EJoinAlgoType::GraceJoin:
+            return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows);
+        default:
+            Y_ENSURE(false, "Illegal join type encountered");
+            return 0;
+    }
+}
+
+
 class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase {
 public:
     TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config)
@@ -206,7 +274,7 @@ protected:
             };
 
             std::unique_ptr<IOptimizerNew> opt;
-            TBaseProviderContext pctx;
+            TDqCBOProviderContext pctx(TypesCtx, Config);
 
             switch (TypesCtx.CostBasedOptimizer) {
             case ECostBasedOptimizerType::Native:

+ 9 - 9
ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json

@@ -755,23 +755,23 @@
     "test.test[dq-blacklisted_pragmas--Results]": [],
     "test.test[dq-join_cbo_native_3_tables--Analyze]": [
         {
-            "checksum": "94e6af2e865eab35e76cc9963452ad0d",
-            "size": 13889,
-            "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
+            "checksum": "90555f07378f801872485e6ac96dfd73",
+            "size": 12314,
+            "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
         }
     ],
     "test.test[dq-join_cbo_native_3_tables--Debug]": [
         {
-            "checksum": "fd20054511c7328de8f8c6c45539b48b",
-            "size": 5339,
-            "uri": "https://{canondata_backend}/1936273/7a32049e7d34640d0891b0eccadb21c671bd9ed5/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
+            "checksum": "91570a2f667516ba1f3f28642698441f",
+            "size": 4802,
+            "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
         }
     ],
     "test.test[dq-join_cbo_native_3_tables--Plan]": [
         {
-            "checksum": "94e6af2e865eab35e76cc9963452ad0d",
-            "size": 13889,
-            "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
+            "checksum": "90555f07378f801872485e6ac96dfd73",
+            "size": 12314,
+            "uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
         }
     ],
     "test.test[dq-join_cbo_native_3_tables--Results]": [