yql_opt_utils.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #pragma once
  2. #include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
  3. #include <yql/essentials/core/yql_graph_transformer.h>
  4. #include <yql/essentials/core/yql_opt_window.h>
  5. #include <yql/essentials/core/yql_type_annotation.h>
  6. #include <util/generic/hash_set.h>
  7. #include <util/generic/strbuf.h>
  8. #include <functional>
  9. namespace NYql {
  10. struct TTypeAnnotationContext;
  11. bool IsJustOrSingleAsList(const TExprNode& node);
  12. bool IsTransparentIfPresent(const TExprNode& node);
  13. bool IsPredicateFlatMap(const TExprNode& node);
  14. bool IsFilterFlatMap(const NNodes::TCoLambda& lambda);
  15. bool IsListReorder(const TExprNode& node);
  16. bool IsRenameFlatMap(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode);
  17. bool IsRenameFlatMapWithMapping(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode, THashMap<TString, TString>& renameMap);
  18. bool IsRenameOrApplyFlatMapWithMapping(const NNodes::TCoFlatMapBase& node, TExprNode::TPtr& structNode, THashMap<TString, TString>& renameMap, TSet<TString>& apply);
  19. bool IsPassthroughFlatMap(const NNodes::TCoFlatMapBase& flatmap, TMaybe<THashSet<TStringBuf>>* passthroughFields, bool analyzeJustMember = false);
  20. bool IsPassthroughLambda(const NNodes::TCoLambda& lambda, TMaybe<THashSet<TStringBuf>>* passthroughFields, bool analyzeJustMember = false);
  21. bool IsTablePropsDependent(const TExprNode& node);
  22. bool HasOnlyOneJoinType(const TExprNode& joinTree, TStringBuf joinType);
  23. TExprNode::TPtr KeepColumnOrder(const TExprNode::TPtr& node, const TExprNode& src, TExprContext& ctx, const TTypeAnnotationContext& typeCtx);
  24. TExprNode::TPtr KeepColumnOrder(const TColumnOrder& order, const TExprNode::TPtr& node, TExprContext& ctx);
  25. // returns true if usedFields contains subset of fields
  26. template<class TFieldsSet>
  27. bool HaveFieldsSubset(const TExprNode::TPtr& start, const TExprNode& arg, TFieldsSet& usedFields, const TParentsMap& parentsMap,
  28. bool allowDependsOn = true);
  29. template<class TFieldsSet>
  30. TExprNode::TPtr FilterByFields(TPositionHandle position, const TExprNode::TPtr& input, const TFieldsSet& subsetFields,
  31. TExprContext& ctx, bool singleValue);
  32. TExprNode::TPtr AddMembersUsedInside(const TExprNode::TPtr& start, const TExprNode& arg, TExprNode::TPtr&& members, const TParentsMap& parentsMap, TExprContext& ctx);
  33. bool IsDepended(const TExprNode& from, const TExprNode& to);
  34. bool MarkDepended(const TExprNode& from, const TExprNode& to, TNodeMap<bool>& deps);
  35. bool IsEmpty(const TExprNode& node, const TTypeAnnotationContext& typeCtx);
  36. bool IsEmptyContainer(const TExprNode& node);
  37. const TTypeAnnotationNode* RemoveOptionalType(const TTypeAnnotationNode* type);
  38. const TTypeAnnotationNode* RemoveAllOptionals(const TTypeAnnotationNode* type);
  39. TExprNode::TPtr GetSetting(const TExprNode& settings, const TStringBuf& name);
  40. TExprNode::TPtr FilterSettings(const TExprNode& settings, const THashSet<TStringBuf>& names, TExprContext& ctx);
  41. bool HasSetting(const TExprNode& settings, const TStringBuf& name);
  42. bool HasAnySetting(const TExprNode& settings, const THashSet<TString>& names);
  43. TExprNode::TPtr RemoveSetting(const TExprNode& settings, const TStringBuf& name, TExprContext& ctx);
  44. TExprNode::TPtr AddSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx);
  45. TExprNode::TPtr AddSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx);
  46. TExprNode::TPtr MergeSettings(const TExprNode& settings1, const TExprNode& settings2, TExprContext& ctx);
  47. TExprNode::TPtr ReplaceSetting(const TExprNode& settings, TPositionHandle pos, const TString& name, const TExprNode::TPtr& value, TExprContext& ctx);
  48. TExprNode::TPtr ReplaceSetting(const TExprNode& settings, const TExprNode::TPtr& newSetting, TExprContext& ctx);
  49. enum class EDictType {
  50. Hashed,
  51. Sorted,
  52. Auto,
  53. };
  54. TMaybe<TIssue> ParseToDictSettings(const TExprNode& node, TExprContext& ctx, TMaybe<EDictType>& type, TMaybe<bool>& isMany, TMaybe<ui64>& itemsCount, bool& isCompact);
  55. EDictType SelectDictType(EDictType type, const TTypeAnnotationNode* keyType);
  56. using MemberUpdaterFunc = std::function<bool (TString& memberName, const TTypeAnnotationNode* TypeAnnotation)>;
  57. bool UpdateStructMembers(TExprContext& ctx, const TExprNode::TPtr& node, const TStringBuf& goal, TExprNode::TListType& members,
  58. MemberUpdaterFunc updaterFunc = MemberUpdaterFunc(), const TTypeAnnotationNode* nodeType = nullptr);
  59. TExprNode::TPtr MakeSingleGroupRow(const TExprNode& aggregateNode, TExprNode::TPtr reduced, TExprContext& ctx);
  60. TExprNode::TPtr ExpandRemoveMember(const TExprNode::TPtr& node, TExprContext& ctx);
  61. TExprNode::TPtr ExpandRemoveMembers(const TExprNode::TPtr& node, TExprContext& ctx);
  62. TExprNode::TPtr ExpandRemovePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx);
  63. TExprNode::TPtr ExpandFlattenMembers(const TExprNode::TPtr& node, TExprContext& ctx);
  64. TExprNode::TPtr ExpandFlattenStructs(const TExprNode::TPtr& node, TExprContext& ctx);
  65. TExprNode::TPtr ExpandDivePrefixMembers(const TExprNode::TPtr& node, TExprContext& ctx);
  66. TExprNode::TPtr ExpandAddMember(const TExprNode::TPtr& node, TExprContext& ctx);
  67. TExprNode::TPtr ExpandReplaceMember(const TExprNode::TPtr& node, TExprContext& ctx);
  68. TExprNode::TPtr ExpandFlattenByColumns(const TExprNode::TPtr& node, TExprContext& ctx);
  69. TExprNode::TPtr ExpandCastStruct(const TExprNode::TPtr& node, TExprContext& ctx);
  70. TExprNode::TPtr ExpandSkipNullFields(const TExprNode::TPtr& node, TExprContext& ctx);
  71. void ExtractSimpleKeys(const TExprNode* keySelectorBody, const TExprNode* keySelectorArg, TVector<TStringBuf>& columns);
  72. inline void ExtractSimpleKeys(const TExprNode& keySelectorLambda, TVector<TStringBuf>& columns) {
  73. ExtractSimpleKeys(keySelectorLambda.Child(1), keySelectorLambda.Child(0)->Child(0), columns);
  74. }
  75. TExprNode::TPtr MakeNull(TPositionHandle position, TExprContext& ctx);
  76. TExprNode::TPtr MakeConstMap(TPositionHandle position, const TExprNode::TPtr& input, const TExprNode::TPtr& value, TExprContext& ctx);
  77. TExprNode::TPtr MakeBoolNothing(TPositionHandle position, TExprContext& ctx);
  78. TExprNode::TPtr MakeBool(TPositionHandle position, bool value, TExprContext& ctx);
  79. TExprNode::TPtr MakeOptionalBool(TPositionHandle position, bool value, TExprContext& ctx);
  80. template <bool Bool>
  81. TExprNode::TPtr MakeBool(TPositionHandle position, TExprContext& ctx);
  82. TExprNode::TPtr MakePgBool(TPositionHandle position, bool value, TExprContext& ctx);
  83. TExprNode::TPtr MakeIdentityLambda(TPositionHandle position, TExprContext& ctx);
  84. constexpr std::initializer_list<std::string_view> SkippableCallables = {"Unordered", "AssumeSorted", "AssumeUnique", "AssumeDistinct",
  85. "AssumeChopped", "AssumeColumnOrder", "AssumeAllMembersNullableAtOnce", "AssumeConstraints"};
  86. const TExprNode& SkipCallables(const TExprNode& node, const std::initializer_list<std::string_view>& skipCallables);
  87. void ExtractSortKeyAndOrder(TPositionHandle pos, const TExprNode::TPtr& sortTraitsNode, TExprNode::TPtr& sortKey, TExprNode::TPtr& sortOrder, TExprContext& ctx);
  88. void ExtractSessionWindowParams(TPositionHandle pos, const TExprNode::TPtr& sessionTraits, TExprNode::TPtr& sessionKey,
  89. const TTypeAnnotationNode*& sessionKeyType, const TTypeAnnotationNode*& sessionParamsType, TExprNode::TPtr& sessionSortTraits, TExprNode::TPtr& sessionInit,
  90. TExprNode::TPtr& sessionUpdate, TExprContext& ctx);
  91. void ExtractSortKeyAndOrder(TPositionHandle pos, const TExprNode::TPtr& sortTraitsNode, TSortParams& sortParams, TExprContext& ctx);
  92. void ExtractSessionWindowParams(TPositionHandle pos, TSessionWindowParams& sessionParams, TExprContext& ctx);
  93. TExprNode::TPtr BuildKeySelector(TPositionHandle pos, const TStructExprType& rowType, const TExprNode::TPtr& keyColumns, TExprContext& ctx);
  94. template <bool Cannonize, bool EnableNewOptimizers = true>
  95. TExprNode::TPtr OptimizeIfPresent(const TExprNode::TPtr& node, TExprContext& ctx);
  96. TExprNode::TPtr OptimizeExists(const TExprNode::TPtr& node, TExprContext& ctx);
  97. bool WarnUnroderedSubquery(const TExprNode& unourderedSubquery, TExprContext& ctx);
  98. std::pair<TExprNode::TPtr, TExprNode::TPtr> ReplaceDependsOn(TExprNode::TPtr lambda, TExprContext& ctx, TTypeAnnotationContext* typeCtx);
  99. TStringBuf GetEmptyCollectionName(ETypeAnnotationKind kind);
  100. inline TStringBuf GetEmptyCollectionName(const TTypeAnnotationNode* type) {
  101. return GetEmptyCollectionName(type->GetKind());
  102. }
  103. const TItemExprType* GetLightColumn(const TStructExprType& type);
  104. // returned value exists as long as lambda object exists
  105. TVector<TStringBuf> GetCommonKeysFromVariantSelector(const NNodes::TCoLambda& lambda);
  106. bool IsIdentityLambda(const TExprNode& lambda);
  107. TExprNode::TPtr MakeExpandMap(TPositionHandle pos, const TVector<TString>& columns, const TExprNode::TPtr& input, TExprContext& ctx);
  108. TExprNode::TPtr MakeNarrowMap(TPositionHandle pos, const TVector<TString>& columns, const TExprNode::TPtr& input, TExprContext& ctx);
  109. TExprNode::TPtr FindNonYieldTransparentNode(const TExprNode::TPtr& root, const TTypeAnnotationContext& typeCtx, TNodeSet flowSources = TNodeSet());
  110. bool IsYieldTransparent(const TExprNode::TPtr& root, const TTypeAnnotationContext& typeCtx);
  111. bool IsStrict(const TExprNode::TPtr& node);
  112. TMaybe<bool> IsStrictNoRecurse(const TExprNode& node);
  113. bool HasDependsOn(const TExprNode::TPtr& node, const TExprNode::TPtr& arg);
  114. TExprNode::TPtr KeepSortedConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx);
  115. TExprNode::TPtr MakeSortByConstraint(TExprNode::TPtr node, const TSortedConstraintNode* sorted, const TTypeAnnotationNode* rowType, TExprContext& ctx);
  116. TExprNode::TPtr KeepConstraints(TExprNode::TPtr node, const TExprNode& src, TExprContext& ctx);
  117. void OptimizeSubsetFieldsForNodeWithMultiUsage(const TExprNode::TPtr& node, const TParentsMap& parentsMap,
  118. TNodeOnNodeOwnedMap& toOptimize, TExprContext& ctx,
  119. std::function<TExprNode::TPtr(const TExprNode::TPtr&, const TExprNode::TPtr&, const TParentsMap&, TExprContext&)> handler);
  120. template<bool Ordered = false>
  121. std::optional<TPartOfConstraintBase::TPathType> GetPathToKey(const TExprNode& body, const TExprNode& arg);
  122. template<bool Ordered = false>
  123. std::optional<std::pair<TPartOfConstraintBase::TPathType, ui32>> GetPathToKey(const TExprNode& body, const TExprNode::TChildrenType& args);
  124. template<bool Ordered = false>
  125. TPartOfConstraintBase::TSetType GetPathsToKeys(const TExprNode& body, const TExprNode& arg);
  126. // generates column names with pattern "prefixN" that do not clash with source columns
  127. // prefix should start with "_yql"
  128. TVector<TString> GenNoClashColumns(const TStructExprType& source, TStringBuf prefix, size_t count);
  129. bool CheckSupportedTypes(const TTypeAnnotationNode::TListType& typesToCheck, const TSet<TString>& typesSupported, const TSet<NUdf::EDataSlot>& dataSlotsSupported, std::function<void(const TString&)> unsupportedTypeHandler);
  130. }