Browse Source

Collect statistic about unsuccessful block rewrites for callables and types (#7642)

Andrey Neporada 7 months ago
parent
commit
1345ce6aaa

+ 43 - 0
ydb/library/yql/core/facade/yql_facade.cpp

@@ -30,6 +30,7 @@
 
 #include <util/stream/file.h>
 #include <util/stream/null.h>
+#include <util/string/join.h>
 #include <util/string/split.h>
 #include <util/generic/guid.h>
 #include <util/system/rusage.h>
@@ -1629,6 +1630,48 @@ NThreading::TFuture<void> TProgram::Abort()
     return CloseLastSession();
 }
 
+TIssues TProgram::Issues() const {
+    TIssues result;
+    if (ExprCtx_) {
+        result.AddIssues(ExprCtx_->IssueManager.GetIssues());
+    }
+    result.AddIssues(FinalIssues_);
+    return result;
+}
+
+TIssues TProgram::CompletedIssues() const {
+    TIssues result;
+    if (ExprCtx_) {
+        result.AddIssues(ExprCtx_->IssueManager.GetCompletedIssues());
+    }
+    result.AddIssues(FinalIssues_);
+    return result;
+}
+
+TIssue MakeNoBlocksInfoIssue(const TVector<TString>& names, bool isTypes) {
+    TIssue result;
+    TString msg = TStringBuilder() << "Most frequent " << (isTypes ? "types " : "callables ")
+                                   << "which do not support block mode: " << JoinRange(", ", names.begin(), names.end());
+    result.SetMessage(msg);
+    result.SetCode(isTypes ? TIssuesIds::CORE_TOP_UNSUPPORTED_BLOCK_TYPES : TIssuesIds::CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES, TSeverityIds::S_INFO);
+    return result;
+}
+
+void TProgram::FinalizeIssues() {
+    FinalIssues_.Clear();
+    if (TypeCtx_) {
+        static const size_t topCount = 10;
+        auto noBlockTypes = TypeCtx_->GetTopNoBlocksTypes(topCount);
+        if (!noBlockTypes.empty()) {
+            FinalIssues_.AddIssue(MakeNoBlocksInfoIssue(noBlockTypes, true));
+        }
+        auto noBlockCallables = TypeCtx_->GetTopNoBlocksCallables(topCount);
+        if (!noBlockCallables.empty()) {
+            FinalIssues_.AddIssue(MakeNoBlocksInfoIssue(noBlockCallables, false));
+        }
+    }
+}
+
 NThreading::TFuture<void> TProgram::CleanupLastSession() {
     YQL_LOG_CTX_ROOT_SESSION_SCOPE(GetSessionId());
 

+ 5 - 18
ydb/library/yql/core/facade/yql_facade.h

@@ -192,28 +192,14 @@ public:
     [[nodiscard]]
     NThreading::TFuture<void> Abort();
 
-    inline TIssues Issues() {
-        if (ExprCtx_) {
-            return ExprCtx_->IssueManager.GetIssues();
-        } else {
-            return {};
-        }
-    }
-
-    inline TIssues CompletedIssues() const {
-        if (ExprCtx_) {
-            return ExprCtx_->IssueManager.GetCompletedIssues();
-        } else {
-            return {};
-        }
-    }
+    TIssues Issues() const;
+    TIssues CompletedIssues() const;
+    void FinalizeIssues();
 
     void Print(IOutputStream* exprOut, IOutputStream* planOut, bool cleanPlan = false);
 
     inline void PrintErrorsTo(IOutputStream& out) const {
-        if (ExprCtx_) {
-            ExprCtx_->IssueManager.GetIssues().PrintWithProgramTo(out, Filename_, SourceCode_);
-        }
+        Issues().PrintWithProgramTo(out, Filename_, SourceCode_);
     }
 
     inline const TAstNode* AstRoot() const {
@@ -455,6 +441,7 @@ private:
     TMaybe<TString> LineageStr_;
 
     TQContext QContext_;
+    TIssues FinalIssues_;
 };
 
 } // namspace NYql

+ 4 - 0
ydb/library/yql/core/issue/protos/issue_id.proto

@@ -39,6 +39,10 @@ message TIssuesIds {
         CORE_ALIAS_SHADOWS_COLUMN = 1111;
         CORE_LINEAGE_INTERNAL_ERROR = 1112;
 
+// core informational
+        CORE_TOP_UNSUPPORTED_BLOCK_TYPES = 1200;
+        CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES = 1201;
+
 // core errors
         CORE_GC_NODES_LIMIT_EXCEEDED = 1500;
         CORE_GC_STRINGS_LIMIT_EXCEEDED = 1501;

+ 8 - 0
ydb/library/yql/core/issue/yql_issue.txt

@@ -663,3 +663,11 @@ ids {
   code: YT_SECURE_DATA_IN_COMMON_TMP
   severity: S_WARNING
 }
+ids {
+  code: CORE_TOP_UNSUPPORTED_BLOCK_TYPES
+  severity: S_INFO
+}
+ids {
+  code: CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES
+  severity: S_INFO
+}

+ 18 - 10
ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp

@@ -2291,10 +2291,11 @@ IGraphTransformer::TStatus PeepHoleFinalStage(const TExprNode::TPtr& input, TExp
 }
 
 IGraphTransformer::TStatus PeepHoleBlockStage(const TExprNode::TPtr& input, TExprNode::TPtr& output,
-    TExprContext& ctx, TTypeAnnotationContext& types, const TExtPeepHoleOptimizerMap& extOptimizers)
+    TExprContext& ctx, TTypeAnnotationContext& types, const TExtPeepHoleOptimizerMap& extOptimizers, TProcessedNodesSet& cache)
 {
     TOptimizeExprSettings settings(&types);
     settings.CustomInstantTypeTransformer = types.CustomInstantTypeTransformer.Get();
+    settings.ProcessedNodes = &cache;
 
     return OptimizeExpr(input, output, [&types, &extOptimizers](
         const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
@@ -5693,7 +5694,11 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
         allInputTypes.push_back(i);
     }
 
-    auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx);
+    const IArrowResolver::TUnsupportedTypeCallback onUnsupportedType = [&types](const auto& typeKindOrSlot) {
+        std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
+    };
+
+    auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx, onUnsupportedType);
     YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
     if (resolveStatus != IArrowResolver::OK) {
         return false;
@@ -5747,7 +5752,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
         if (node->IsList() || rewriteAsIs ||
             node->IsCallable({"And", "Or", "Xor", "Not", "Coalesce", "Exists", "If", "Just", "AsStruct", "Member", "Nth", "ToPg", "FromPg", "PgResolvedCall", "PgResolvedOp"}))
         {
-            if (node->IsCallable() && !IsSupportedAsBlockType(node->Pos(), *node->GetTypeAnn(), ctx, types)) {
+            if (node->IsCallable() && !IsSupportedAsBlockType(node->Pos(), *node->GetTypeAnn(), ctx, types, true)) {
                 return true;
             }
 
@@ -5775,7 +5780,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
                 auto child = node->ChildPtr(index);
                 if (!child->GetTypeAnn()->IsComputable()) {
                     funcArgs.push_back(child);
-                } else if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types)) {
+                } else if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types, true)) {
                     funcArgs.push_back(ctx.NewCallable(node->Pos(), "AsScalar", { child }));
                 } else if (auto rit = rewrites.find(child.Get()); rit != rewrites.end()) {
                     funcArgs.push_back(rit->second);
@@ -5796,7 +5801,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
                     auto member = funcArgs[index];
                     auto child = member->TailPtr();
                     TExprNodePtr rewrite;
-                    if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types)) {
+                    if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types, true)) {
                         rewrite = ctx.NewCallable(child->Pos(), "AsScalar", { child });
                     } else if (auto rit = rewrites.find(child.Get()); rit != rewrites.end()) {
                         rewrite = rit->second;
@@ -5821,6 +5826,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
         const bool isUdf = node->IsCallable("Apply") && node->Head().IsCallable("Udf");
         if (isUdf) {
             if (!GetSetting(*node->Head().Child(7), "blocks")) {
+                types.IncNoBlockCallable(node->Head().Head().Content());
                 return true;
             }
         }
@@ -5832,7 +5838,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
                 allTypes.push_back(node->Child(i)->GetTypeAnn());
             }
 
-            auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx);
+            auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx, onUnsupportedType);
             YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
             if (resolveStatus != IArrowResolver::OK) {
                 return true;
@@ -5898,6 +5904,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
         } else {
             auto fit = funcs.find(node->Content());
             if (fit == funcs.end()) {
+                types.IncNoBlockCallable(node->Content());
                 return true;
             }
 
@@ -5907,6 +5914,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
             auto resolveStatus = types.ArrowResolver->LoadFunctionMetadata(ctx.GetPosition(node->Pos()), arrowFunctionName, argTypes, outType, ctx);
             YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
             if (resolveStatus != IArrowResolver::OK) {
+                types.IncNoBlockCallable(node->Content());
                 return true;
             }
             funcArgs.push_back(ExpandType(node->Pos(), *outType, ctx));
@@ -8555,10 +8563,10 @@ THolder<IGraphTransformer> CreatePeepHoleFinalStageTransformer(TTypeAnnotationCo
 
     pipeline.Add(
         CreateFunctorTransformer(
-            [&types](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
+            [&types, cache = TProcessedNodesSet()](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) mutable -> IGraphTransformer::TStatus {
                 if (types.IsBlockEngineEnabled()) {
                     const auto& extStageRules = TPeepHoleRules::Instance().BlockStageExtRules;
-                    return PeepHoleBlockStage(input, output, ctx, types, extStageRules);
+                    return PeepHoleBlockStage(input, output, ctx, types, extStageRules, cache);
                 } else {
                     output = input;
                     return IGraphTransformer::TStatus::Ok;
@@ -8574,10 +8582,10 @@ THolder<IGraphTransformer> CreatePeepHoleFinalStageTransformer(TTypeAnnotationCo
 
     pipeline.Add(
         CreateFunctorTransformer(
-            [&types](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
+            [&types, cache = TProcessedNodesSet()](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) mutable -> IGraphTransformer::TStatus {
                 if (types.IsBlockEngineEnabled()) {
                     const auto& extStageRules = TPeepHoleRules::Instance().BlockStageExtFinalRules;
-                    return PeepHoleBlockStage(input, output, ctx, types, extStageRules);
+                    return PeepHoleBlockStage(input, output, ctx, types, extStageRules, cache);
                 } else {
                     output = input;
                     return IGraphTransformer::TStatus::Ok;

+ 6 - 1
ydb/library/yql/core/yql_arrow_resolver.h

@@ -2,11 +2,15 @@
 
 #include <ydb/library/yql/ast/yql_expr.h>
 
+#include <functional>
+#include <variant>
+
 namespace NYql {
 
 class IArrowResolver : public TThrRefBase {
 public:
     using TPtr = TIntrusiveConstPtr<IArrowResolver>;
+    using TUnsupportedTypeCallback = std::function<void(std::variant<ETypeAnnotationKind, NUdf::EDataSlot>)>;
 
     enum EStatus {
         OK,
@@ -21,7 +25,8 @@ public:
 
     virtual EStatus HasCast(const TPosition& pos, const TTypeAnnotationNode* from, const TTypeAnnotationNode* to, TExprContext& ctx) const = 0;
 
-    virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx) const = 0;
+    virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx,
+        const TUnsupportedTypeCallback& onUnsupported = {}) const = 0;
 };
 
 }

+ 10 - 2
ydb/library/yql/core/yql_expr_type_annotation.cpp

@@ -3126,12 +3126,20 @@ bool IsWideSequenceBlockType(const TTypeAnnotationNode& type) {
     return IsWideBlockType(*itemType);
 }
 
-bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types) {
+bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types,
+    bool reportUnspported)
+{
     if (!types.ArrowResolver) {
         return false;
     }
 
-    auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx);
+    IArrowResolver::TUnsupportedTypeCallback onUnsupportedType;
+    if (reportUnspported) {
+        onUnsupportedType  = [&types](const auto& typeKindOrSlot) {
+            std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
+        };
+    }
+    auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx, onUnsupportedType);
     YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
     return resolveStatus == IArrowResolver::OK;
 }

+ 1 - 1
ydb/library/yql/core/yql_expr_type_annotation.h

@@ -130,7 +130,7 @@ bool EnsureWideStreamType(const TExprNode& node, TExprContext& ctx);
 bool EnsureWideStreamType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx);
 bool IsWideBlockType(const TTypeAnnotationNode& type);
 bool IsWideSequenceBlockType(const TTypeAnnotationNode& type);
-bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types);
+bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types, bool reportUnspported = false);
 bool EnsureSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types);
 bool EnsureWideBlockType(TPositionHandle position, const TTypeAnnotationNode& type, TTypeAnnotationNode::TListType& blockItemTypes, TExprContext& ctx, bool allowScalar = true);
 bool EnsureWideFlowBlockType(const TExprNode& node, TTypeAnnotationNode::TListType& blockItemTypes, TExprContext& ctx, bool allowScalar = true);

+ 55 - 0
ydb/library/yql/core/yql_type_annotation.cpp

@@ -56,6 +56,61 @@ void TTypeAnnotationContext::Reset() {
     ExpectedConstraints.clear();
     ExpectedColumnOrders.clear();
     StatisticsMap.clear();
+    NoBlockRewriteCallableStats.clear();
+    NoBlockRewriteTypeStats.clear();
+}
+
+void TTypeAnnotationContext::IncNoBlockCallable(TStringBuf callableName) {
+    ++NoBlockRewriteCallableStats[callableName];
+}
+
+void TTypeAnnotationContext::IncNoBlockType(const TTypeAnnotationNode& type) {
+    if (type.GetKind() == ETypeAnnotationKind::Data) {
+        IncNoBlockType(type.Cast<TDataExprType>()->GetSlot());
+    } else {
+        IncNoBlockType(type.GetKind());
+    }
+}
+
+void TTypeAnnotationContext::IncNoBlockType(ETypeAnnotationKind kind) {
+    ++NoBlockRewriteTypeStats[ToString(kind)];
+}
+
+void TTypeAnnotationContext::IncNoBlockType(NUdf::EDataSlot slot) {
+    ++NoBlockRewriteTypeStats[ToString(slot)];
+}
+
+namespace {
+
+template<typename T>
+TVector<T> GetMaxByCount(const THashMap<T, size_t>& stats, size_t maxCount) {
+    TVector<T> result;
+    result.reserve(stats.size());
+    for (auto& [key, _] : stats) {
+        result.push_back(key);
+    }
+    size_t n = std::min(maxCount, stats.size());
+    std::partial_sort(result.begin(), result.begin() + n, result.end(),
+        [&stats](const T& l, const T& r) {
+            const auto& cntLeft = stats.find(l)->second;
+            const auto& cntRight = stats.find(r)->second;
+            if (cntLeft != cntRight) {
+                return cntLeft < cntRight;
+            }
+            return l < r;
+        });
+    result.resize(n);
+    return result;
+}
+
+}
+
+TVector<TString> TTypeAnnotationContext::GetTopNoBlocksCallables(size_t maxCount) const {
+    return GetMaxByCount(NoBlockRewriteCallableStats, maxCount);
+}
+
+TVector<TString> TTypeAnnotationContext::GetTopNoBlocksTypes(size_t maxCount) const {
+    return GetMaxByCount(NoBlockRewriteTypeStats, maxCount);
 }
 
 TString TColumnOrder::Find(const TString& name) const {

+ 10 - 0
ydb/library/yql/core/yql_type_annotation.h

@@ -334,6 +334,8 @@ struct TTypeAnnotationContext: public TThrRefBase {
     ui32 FolderSubDirsLimit = 1000;
     bool UseBlocks = false;
     EBlockEngineMode BlockEngineMode = EBlockEngineMode::Disable;
+    THashMap<TString, size_t> NoBlockRewriteCallableStats;
+    THashMap<TString, size_t> NoBlockRewriteTypeStats;
     TMaybe<bool> PgEmitAggApply;
     IArrowResolver::TPtr ArrowResolver;
     TFileStoragePtr FileStorage;
@@ -441,6 +443,14 @@ struct TTypeAnnotationContext: public TThrRefBase {
     bool IsBlockEngineEnabled() const {
         return BlockEngineMode != EBlockEngineMode::Disable || UseBlocks;
     }
+
+    void IncNoBlockCallable(TStringBuf callableName);
+    void IncNoBlockType(const TTypeAnnotationNode& type);
+    void IncNoBlockType(ETypeAnnotationKind kind);
+    void IncNoBlockType(NUdf::EDataSlot slot);
+
+    TVector<TString> GetTopNoBlocksCallables(size_t maxCount) const;
+    TVector<TString> GetTopNoBlocksTypes(size_t maxCount) const;
 };
 
 template <> inline

Some files were not shown because too many files changed in this diff