yql_join.h 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #pragma once
  2. #include <yql/essentials/ast/yql_expr.h>
  3. #include <yql/essentials/core/expr_nodes/yql_expr_nodes.h>
  4. #include <yql/essentials/core/yql_graph_transformer.h>
  5. #include <yql/essentials/core/yql_cost_function.h>
  6. #include <util/generic/set.h>
  7. #include <util/generic/vector.h>
  8. #include <util/generic/hash.h>
  9. #include <util/generic/strbuf.h>
  10. namespace NYql {
  11. inline TString FullColumnName(const TStringBuf& table, const TStringBuf& column) {
  12. return TString::Join(table, ".", column);
  13. }
  14. inline void SplitTableName(const TStringBuf& fullName, TStringBuf& table, TStringBuf& column) {
  15. auto pos = fullName.find('.');
  16. Y_ENSURE(pos != TString::npos, "Expected full column name: " << fullName);
  17. table = fullName.substr(0, pos);
  18. column = fullName.substr(pos + 1);
  19. }
  20. struct TJoinLabel {
  21. TMaybe<TIssue> Parse(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique, const TDistinctConstraintNode* distinct);
  22. TMaybe<TIssue> ValidateLabel(TExprContext& ctx, const NNodes::TCoAtom& label);
  23. TString FullName(const TStringBuf& column) const;
  24. TVector<TString> AllNames(const TStringBuf& column) const;
  25. TStringBuf ColumnName(const TStringBuf& column) const;
  26. TStringBuf TableName(const TStringBuf& column) const;
  27. bool HasTable(const TStringBuf& table) const;
  28. TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& table, const TStringBuf& column) const;
  29. TString MemberName(const TStringBuf& table, const TStringBuf& column) const;
  30. TVector<TString> EnumerateAllColumns() const;
  31. TVector<TString> EnumerateAllMembers() const;
  32. bool AddLabel = false;
  33. const TStructExprType* InputType;
  34. TVector<TStringBuf> Tables;
  35. const TUniqueConstraintNode* Unique = nullptr;
  36. const TDistinctConstraintNode* Distinct = nullptr;
  37. };
  38. struct TJoinLabels {
  39. TMaybe<TIssue> Add(TExprContext& ctx, TExprNode& node, const TStructExprType* structType, const TUniqueConstraintNode* unique = nullptr, const TDistinctConstraintNode* distinct = nullptr);
  40. TMaybe<const TJoinLabel*> FindInput(const TStringBuf& table) const;
  41. TMaybe<ui32> FindInputIndex(const TStringBuf& table) const;
  42. TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& table, const TStringBuf& column) const;
  43. TMaybe<const TTypeAnnotationNode*> FindColumn(const TStringBuf& fullName) const;
  44. TVector<TString> EnumerateColumns(const TStringBuf& table) const;
  45. TVector<TJoinLabel> Inputs;
  46. THashMap<TStringBuf, ui32> InputByTable;
  47. };
  48. struct TJoinOptions {
  49. THashMap<TStringBuf, TVector<TStringBuf>> RenameMap;
  50. TSet<TVector<TStringBuf>> PreferredSortSets;
  51. bool Flatten = false;
  52. bool StrictKeys = false;
  53. bool Compact = false;
  54. };
  55. IGraphTransformer::TStatus ValidateEquiJoinOptions(
  56. TPositionHandle positionHandle,
  57. TExprNode& optionsNode,
  58. TJoinOptions& options,
  59. TExprContext& ctx
  60. );
  61. IGraphTransformer::TStatus EquiJoinAnnotation(
  62. TPositionHandle position,
  63. const TStructExprType*& resultType,
  64. const TJoinLabels& labels,
  65. TExprNode& joins,
  66. const TJoinOptions& options,
  67. TExprContext& ctx
  68. );
  69. IGraphTransformer::TStatus EquiJoinConstraints(
  70. TPositionHandle positionHandle,
  71. const TUniqueConstraintNode*& unique,
  72. const TDistinctConstraintNode*& distinct,
  73. const TJoinLabels& labels,
  74. TExprNode& joins,
  75. TExprContext& ctx
  76. );
  77. THashMap<TStringBuf, THashSet<TStringBuf>> CollectEquiJoinKeyColumnsByLabel(const TExprNode& joinTree);
  78. bool IsLeftJoinSideOptional(const TStringBuf& joinType);
  79. bool IsRightJoinSideOptional(const TStringBuf& joinType);
  80. THashMap<TStringBuf, bool> CollectAdditiveInputLabels(const NNodes::TCoEquiJoinTuple& joinTree);
  81. TExprNode::TPtr FilterOutNullJoinColumns(TPositionHandle pos, const TExprNode::TPtr& input,
  82. const TJoinLabel& label, const TSet<TString>& optionalKeyColumns, TExprContext& ctx);
  83. TMap<TStringBuf, TVector<TStringBuf>> LoadJoinRenameMap(const TExprNode& settings);
  84. NNodes::TCoLambda BuildJoinRenameLambda(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
  85. const TStructExprType& joinResultType, TExprContext& ctx);
  86. TSet<TVector<TStringBuf>> LoadJoinSortSets(const TExprNode& settings);
  87. THashMap<TString, const TTypeAnnotationNode*> GetJoinColumnTypes(const TExprNode& joins,
  88. const TJoinLabels& labels, TExprContext& ctx);
  89. THashMap<TString, const TTypeAnnotationNode*> GetJoinColumnTypes(const TExprNode& joins,
  90. const TJoinLabels& labels, const TStringBuf& joinType, TExprContext& ctx);
  91. bool AreSameJoinKeys(const TExprNode& joins, const TStringBuf& table1, const TStringBuf& column1, const TStringBuf& table2, const TStringBuf& column2);
  92. // returns (is required side + allow skip nulls);
  93. std::pair<bool, bool> IsRequiredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);
  94. TMaybe<bool> IsFilteredSide(const TExprNode::TPtr& joinTree, const TJoinLabels& labels, ui32 inputIndex);
  95. void AppendEquiJoinRenameMap(TPositionHandle pos, const TMap<TStringBuf, TVector<TStringBuf>>& newRenameMap,
  96. TExprNode::TListType& joinSettingNodes, TExprContext& ctx);
  97. void AppendEquiJoinSortSets(TPositionHandle pos, const TSet<TVector<TStringBuf>>& newSortSets,
  98. TExprNode::TListType& joinSettingNodes, TExprContext& ctx);
  99. TMap<TStringBuf, TVector<TStringBuf>> UpdateUsedFieldsInRenameMap(
  100. const TMap<TStringBuf, TVector<TStringBuf>>& renameMap,
  101. const TSet<TStringBuf>& usedFields,
  102. const TStructExprType* structType
  103. );
  104. struct TEquiJoinParent {
  105. TEquiJoinParent(const TExprNode* node, ui32 index, const TExprNode* extractedMembers)
  106. : Node(node)
  107. , Index(index)
  108. , ExtractedMembers(extractedMembers)
  109. {
  110. }
  111. const TExprNode* Node;
  112. ui32 Index;
  113. const TExprNode* ExtractedMembers;
  114. };
  115. TVector<TEquiJoinParent> CollectEquiJoinOnlyParents(const NNodes::TCoFlatMapBase& flatMap, const TParentsMap& parents);
  116. struct TEquiJoinLinkSettings {
  117. TPositionHandle Pos;
  118. TSet<TString> LeftHints;
  119. TSet<TString> RightHints;
  120. EJoinAlgoType JoinAlgo = EJoinAlgoType::Undefined;
  121. // JOIN implementation may ignore this flags if SortedMerge strategy is not supported
  122. bool ForceSortedMerge = false;
  123. bool Compact = false;
  124. TVector<TString> JoinAlgoOptions;
  125. };
  126. TEquiJoinLinkSettings GetEquiJoinLinkSettings(const TExprNode& linkSettings);
  127. TExprNode::TPtr BuildEquiJoinLinkSettings(const TEquiJoinLinkSettings& linkSettings, TExprContext& ctx);
  128. TExprNode::TPtr RemapNonConvertibleMemberForJoin(TPositionHandle pos, const TExprNode::TPtr& memberValue,
  129. const TTypeAnnotationNode& memberType, const TTypeAnnotationNode& unifiedType, TExprContext& ctx);
  130. TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNode::TListType& keyTypes, TExprNode::TListType& keys, bool payload, bool optional, bool filter, TExprContext& ctx);
  131. TExprNode::TPtr PrepareListForJoin(TExprNode::TPtr list, const TTypeAnnotationNode::TListType& keyTypes, TExprNode::TListType& keys, TExprNode::TListType&& payloads, bool payload, bool optional, bool filter, TExprContext& ctx);
  132. template<bool Squeeze = false>
  133. TExprNode::TPtr MakeDictForJoin(TExprNode::TPtr&& list, bool payload, bool multi, TExprContext& ctx);
  134. TExprNode::TPtr MakeCrossJoin(TPositionHandle pos, TExprNode::TPtr left, TExprNode::TPtr right, TExprContext& ctx);
  135. void GatherAndTerms(const TExprNode::TPtr& predicate, TExprNode::TListType& andTerms, bool& isPg, TExprContext& ctx);
  136. TExprNode::TPtr FuseAndTerms(TPositionHandle position, const TExprNode::TListType& andTerms, const TExprNode::TPtr& exclude, bool isPg, TExprContext& ctx);
  137. bool IsEquality(TExprNode::TPtr predicate, TExprNode::TPtr& left, TExprNode::TPtr& right);
  138. void GatherJoinInputs(const TExprNode::TPtr& expr, const TExprNode& row,
  139. const TParentsMap& parentsMap, const THashMap<TString, TString>& backRenameMap,
  140. const TJoinLabels& labels, TSet<ui32>& inputs, TSet<TStringBuf>& usedFields);
  141. }