Просмотр исходного кода

Added support for optional for predicate selectivity

Added support for optional for predicate selectivity
pavelvelikhov 1 год назад
Родитель
Сommit
73482b1b1f

+ 1 - 78
ydb/core/kqp/opt/kqp_constant_folding_transformer.cpp

@@ -1,5 +1,6 @@
 #include "kqp_constant_folding_transformer.h"
 
+#include <ydb/library/yql/dq/opt/dq_opt_stat.h>
 #include <ydb/library/yql/utils/log/log.h>
 #include <ydb/library/yql/core/yql_expr_type_annotation.h>
 
@@ -9,84 +10,6 @@ using namespace NKikimr::NKqp;
 using namespace NYql::NDq;
 
 namespace {
-
-    /***
-     * We maintain a white list of callables that we consider part of constant expressions
-     * All other callables will not be evaluated
-     */
-    THashSet<TString> constantFoldingWhiteList = {
-        "Concat", "Just", "Optional","SafeCast",
-        "+", "-", "*", "/", "%"};
-
-    bool NeedCalc(NNodes::TExprBase node) {
-        auto type = node.Ref().GetTypeAnn();
-        if (type->IsSingleton()) {
-            return false;
-        }
-
-        if (type->GetKind() == ETypeAnnotationKind::Optional) {
-            if (node.Maybe<TCoNothing>()) {
-                return false;
-            }
-            if (auto maybeJust = node.Maybe<TCoJust>()) {
-                return NeedCalc(maybeJust.Cast().Input());
-            }
-            return true;
-        }
-
-        if (type->GetKind() == ETypeAnnotationKind::Tuple) {
-            if (auto maybeTuple = node.Maybe<TExprList>()) {
-                return AnyOf(maybeTuple.Cast(), [](const auto& item) { return NeedCalc(item); });
-            }
-            return true;
-        }
-
-        if (type->GetKind() == ETypeAnnotationKind::List) {
-            if (node.Maybe<TCoList>()) {
-                YQL_ENSURE(node.Ref().ChildrenSize() == 1, "Should be rewritten to AsList");
-                return false;
-            }
-            if (auto maybeAsList = node.Maybe<TCoAsList>()) {
-                return AnyOf(maybeAsList.Cast().Args(), [](const auto& item) { return NeedCalc(NNodes::TExprBase(item)); });
-            }
-            return true;
-        }
-
-        YQL_ENSURE(type->GetKind() == ETypeAnnotationKind::Data,
-                   "Object of type " << *type << " should not be considered for calculation");
-
-        return !node.Maybe<TCoDataCtor>();
-    }
-
-    /***
-     * Check if the expression is a constant expression
-     * Its type annotation need to specify that its a data type, and then we check:
-     *   - If its a literal, its a constant expression
-     *   - If its a callable in the while list and all children are constant expressions, then its a constant expression
-     *   - If one of the child is a type expression, it also passes the check
-     */
-    bool IsConstantExpr(const TExprNode::TPtr& input) {
-        if (!IsDataOrOptionalOfData(input->GetTypeAnn())) {
-            return false;
-        }
-
-        if (!NeedCalc(TExprBase(input))) {
-            return true;
-        }
-
-        else if (input->IsCallable(constantFoldingWhiteList)) {
-            for (size_t i = 0; i < input->ChildrenSize(); i++) {
-                auto callableInput = input->Child(i);
-                if (callableInput->GetTypeAnn()->GetKind() != ETypeAnnotationKind::Type && !IsConstantExpr(callableInput)) {
-                    return false;
-                }
-            }
-            return true;
-        }
-
-        return false;
-    }
-
     /**
      * Traverse a lambda and create a mapping from nodes to nodes wrapped in EvaluateExpr callable
      * We check for literals specifically, since they shouldn't be evaluated

+ 1 - 0
ydb/core/kqp/ut/common/CMakeLists.darwin-arm64.txt

@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
 )

+ 1 - 0
ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt

@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
 )

+ 1 - 0
ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt

@@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
 )

+ 1 - 0
ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt

@@ -34,4 +34,5 @@ target_sources(kqp-ut-common PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
 )

+ 1 - 0
ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt

@@ -33,4 +33,5 @@ target_sources(kqp-ut-common PRIVATE
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/string_udf.cpp
   ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/columnshard.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/datetime2_udf.cpp
 )

+ 9 - 0
ydb/core/kqp/ut/common/datetime2_udf.cpp

@@ -0,0 +1,9 @@
+#include <ydb/library/yql/udfs/common/datetime2/datetime_udf.cpp>
+
+namespace NKikimr::NKqp {
+
+NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateDateTime2Module() {
+    return new ::TDateTime2Module();
+}
+
+} // namespace NKikimr::NKqp

+ 1 - 0
ydb/core/kqp/ut/common/ya.make

@@ -7,6 +7,7 @@ SRCS(
     re2_udf.cpp
     string_udf.cpp
     columnshard.cpp
+    datetime2_udf.cpp
 )
 
 PEERDIR(

+ 29 - 2
ydb/core/kqp/ut/join/kqp_join_order_ut.cpp

@@ -19,6 +19,7 @@ static void CreateSampleTable(TSession session) {
         CREATE TABLE `/Root/R` (
             id Int32,
             payload1 String,
+            ts Date,
             PRIMARY KEY (id)
         );
     )").GetValueSync().IsSuccess());
@@ -57,8 +58,8 @@ static void CreateSampleTable(TSession session) {
 
         UNIT_ASSERT(session.ExecuteDataQuery(R"(
 
-        REPLACE INTO `/Root/R` (id, payload1) VALUES
-            (1, "blah");
+        REPLACE INTO `/Root/R` (id, payload1, ts) VALUES
+            (1, "blah", CAST("1998-12-01" AS Date) );
 
         REPLACE INTO `/Root/S` (id, payload2) VALUES
             (1, "blah");
@@ -431,6 +432,32 @@ Y_UNIT_TEST_SUITE(KqpJoinOrder) {
             Cout << result.GetPlan();
         }
     }
+
+    Y_UNIT_TEST(DatetimeConstantFold) {
+
+        auto kikimr = GetKikimrWithJoinSettings();
+        auto db = kikimr.GetTableClient();
+        auto session = db.CreateSession().GetValueSync().GetSession();
+
+        CreateSampleTable(session);
+
+        /* join with parameters */
+        {
+            const TString query = Q_(R"(
+                SELECT *
+                FROM `/Root/R` as R
+                WHERE CAST(R.ts AS Timestamp) = (CAST('1998-12-01' AS Date) - Interval("P100D"))
+            )");
+
+            auto result = session.ExplainDataQuery(query).ExtractValueSync();
+
+            UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS);
+
+            NJson::TJsonValue plan;
+            NJson::ReadJsonTree(result.GetPlan(), &plan, true);
+            Cout << result.GetPlan();
+        }
+    }
 }
 
 }

+ 6 - 34
ydb/library/yql/dq/opt/dq_opt_predicate_selectivity.cpp

@@ -8,8 +8,6 @@ using namespace NYql::NNodes;
 
 namespace {
 
-    THashSet<TString> exprCallables = {"SafeCast"};
-
     /**
      * Check if a callable is an attribute of some table
      * Currently just return a boolean and cover only basic cases
@@ -22,34 +20,8 @@ namespace {
             return IsAttribute(cast.Cast().Value(), attributeName);
         } else if (auto ifPresent = input.Maybe<TCoIfPresent>()) {
             return IsAttribute(ifPresent.Cast().Optional(), attributeName);
-        }
-
-        return false;
-    }
-
-    /**
-     * Check that the expression is a constant expression
-     * We use a whitelist of callables
-     */
-    bool IsConstant(const TExprBase& input) {
-        if (input.Maybe<TCoDataCtor>()){
-            return true;
-        } else if (input.Ref().IsCallable(exprCallables)) {
-            if (input.Ref().ChildrenSize() >= 1) {
-                for (size_t i = 0; i < input.Ref().ChildrenSize(); i++) {
-                    auto callableInput = TExprBase(input.Ref().Child(i));
-                    if (!IsConstant(callableInput)) {
-                        return false;
-                    }
-                }
-                return true; 
-            } else {
-                return false;
-            }
-        } else if (auto op = input.Maybe<TCoBinaryArithmetic>()) {
-            auto left = op.Cast().Left();
-            auto right = op.Cast().Right();
-            return IsConstant(left) && IsConstant(right);
+        } else if (auto just = input.Maybe<TCoJust>()) {
+            return IsAttribute(just.Cast().Input(), attributeName);
         }
 
         return false;
@@ -100,7 +72,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
 
         TString attributeName;
 
-        if (IsAttribute(right, attributeName) && IsConstant(left)) {
+        if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
             std::swap(left, right);
         }
 
@@ -114,7 +86,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
             // In case the right side is a constant that can be extracted, compute the selectivity using statistics
             // Currently, with the basic statistics we just return 1/nRows
 
-            else if (IsConstant(right)) {
+            else if (IsConstantExpr(right.Ptr())) {
                 if (stats->KeyColumns.size()==1 && attributeName==stats->KeyColumns[0]) {
                     if (stats->Nrows > 1) {
                         result = 1.0 / stats->Nrows;
@@ -141,7 +113,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
         auto right = comparison.Cast().Right();
 
         TString attributeName;
-        if (IsAttribute(right, attributeName) && IsConstant(left)) {
+        if (IsAttribute(right, attributeName) && IsConstantExpr(left.Ptr())) {
             std::swap(left, right);
         }
 
@@ -152,7 +124,7 @@ double NYql::NDq::ComputePredicateSelectivity(const TExprBase& input, const std:
             }
             // In case the right side is a constant that can be extracted, compute the selectivity using statistics
             // Currently, with the basic statistics we just return 0.5
-            else if (IsConstant(right)) {
+            else if (IsConstantExpr(right.Ptr())) {
                 result = 0.5;
             }
         }

Некоторые файлы не были показаны из-за большого количества измененных файлов