cbo_optimizer_new.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. #pragma once
  2. #include <util/generic/vector.h>
  3. #include <util/generic/string.h>
  4. #include <yql/essentials/core/yql_statistics.h>
  5. #include <yql/essentials/core/yql_cost_function.h>
  6. #include <unordered_map>
  7. #include <memory>
  8. #include <map>
  9. #include <sstream>
  10. namespace NYql {
  11. /**
  12. * OptimizerNodes are the internal representations of operators inside the
  13. * Cost-based optimizer. Currently we only support RelOptimizerNode - a node that
  14. * is an input relation to the equi-join, and JoinOptimizerNode - an inner join
  15. * that connects two sets of relations.
  16. */
  17. enum EOptimizerNodeKind: ui32
  18. {
  19. RelNodeType,
  20. JoinNodeType
  21. };
  22. /**
  23. * BaseOptimizerNode is a base class for the internal optimizer nodes
  24. * It records a pointer to statistics and records the current cost of the
  25. * operator tree, rooted at this node
  26. */
  27. struct IBaseOptimizerNode {
  28. EOptimizerNodeKind Kind;
  29. TOptimizerStatistics Stats;
  30. IBaseOptimizerNode(EOptimizerNodeKind k) : Kind(k) {}
  31. IBaseOptimizerNode(EOptimizerNodeKind k, TOptimizerStatistics s) :
  32. Kind(k), Stats(std::move(s)) {}
  33. virtual TVector<TString> Labels()=0;
  34. virtual void Print(std::stringstream& stream, int ntabs=0)=0;
  35. };
  36. enum EJoinKind: ui32
  37. {
  38. InnerJoin,
  39. LeftJoin,
  40. RightJoin,
  41. OuterJoin,
  42. LeftOnly /* == LeftAntiJoin */,
  43. RightOnly /* == RightAntiJoin */,
  44. LeftSemi,
  45. RightSemi,
  46. Cross,
  47. Exclusion
  48. };
  49. EJoinKind ConvertToJoinKind(const TString& joinString);
  50. TString ConvertToJoinString(const EJoinKind kind);
  51. struct TCardinalityHints {
  52. enum ECardOperation : ui32 {
  53. Add,
  54. Subtract,
  55. Multiply,
  56. Divide,
  57. Replace
  58. };
  59. struct TCardinalityHint {
  60. TVector<TString> JoinLabels;
  61. ECardOperation Operation;
  62. double Value;
  63. TString StringRepr;
  64. bool Applied = false;
  65. double ApplyHint(double originalValue) {
  66. Applied = true;
  67. switch (Operation) {
  68. case Add:
  69. return originalValue + Value;
  70. case Subtract:
  71. return originalValue - Value;
  72. case Multiply:
  73. return originalValue * Value;
  74. case Divide:
  75. return originalValue / Value;
  76. case Replace:
  77. return Value;
  78. }
  79. }
  80. };
  81. TVector<TCardinalityHint> Hints;
  82. void PushBack(TVector<TString> labels, ECardOperation op, double value, TString stringRepr) {
  83. Hints.push_back({.JoinLabels = std::move(labels), .Operation = op, .Value = value, .StringRepr = std::move(stringRepr)});
  84. }
  85. };
  86. struct TJoinAlgoHints {
  87. struct TJoinAlgoHint {
  88. TVector<TString> JoinLabels;
  89. EJoinAlgoType Algo;
  90. TString StringRepr;
  91. bool Applied = false;
  92. };
  93. TVector<TJoinAlgoHint> Hints;
  94. void PushBack(TVector<TString> labels, EJoinAlgoType algo, TString stringRepr) {
  95. Hints.push_back({.JoinLabels = std::move(labels), .Algo = algo, .StringRepr = std::move(stringRepr)});
  96. }
  97. };
  98. struct TJoinOrderHints {
  99. struct ITreeNode {
  100. enum _ : ui32 {
  101. Relation,
  102. Join
  103. };
  104. virtual TVector<TString> Labels() = 0;
  105. bool IsRelation() { return Type == Relation; }
  106. bool IsJoin() { return Type == Join; }
  107. virtual ~ITreeNode() = default;
  108. ui32 Type;
  109. };
  110. struct TJoinNode: public ITreeNode {
  111. TJoinNode(std::shared_ptr<ITreeNode> lhs, std::shared_ptr<ITreeNode> rhs)
  112. : Lhs(std::move(lhs))
  113. , Rhs(std::move(rhs))
  114. {
  115. this->Type = ITreeNode::Join;
  116. }
  117. TVector<TString> Labels() override {
  118. auto labels = Lhs->Labels();
  119. auto rhsLabels = Rhs->Labels();
  120. labels.insert(labels.end(), std::make_move_iterator(rhsLabels.begin()), std::make_move_iterator(rhsLabels.end()));
  121. return labels;
  122. }
  123. std::shared_ptr<ITreeNode> Lhs;
  124. std::shared_ptr<ITreeNode> Rhs;
  125. };
  126. struct TRelationNode: public ITreeNode {
  127. TRelationNode(TString label)
  128. : Label(std::move(label))
  129. {
  130. this->Type = ITreeNode::Relation;
  131. }
  132. TVector<TString> Labels() override { return {Label}; }
  133. TString Label;
  134. };
  135. struct TJoinOrderHint {
  136. std::shared_ptr<ITreeNode> Tree;
  137. TString StringRepr;
  138. bool Applied = false;
  139. };
  140. TVector<TJoinOrderHint> Hints;
  141. void PushBack(std::shared_ptr<ITreeNode> hintTree, TString stringRepr) {
  142. Hints.push_back({.Tree = std::move(hintTree), .StringRepr = std::move(stringRepr)});
  143. }
  144. };
  145. struct TOptimizerHints {
  146. std::shared_ptr<TCardinalityHints> CardinalityHints = std::make_shared<TCardinalityHints>();
  147. std::shared_ptr<TJoinAlgoHints> JoinAlgoHints = std::make_shared<TJoinAlgoHints>();
  148. std::shared_ptr<TJoinOrderHints> JoinOrderHints = std::make_shared<TJoinOrderHints>();
  149. TVector<TString> GetUnappliedString();
  150. /*
  151. * The function accepts string with three type of expressions: array of (JoinAlgo | Card | JoinOrder):
  152. * 1) JoinAlgo(t1 t2 ... tn Map | Grace | Lookup) to change join algo for join, where these labels take part
  153. * 2) Card(t1 t2 ... tn (*|/|+|-) Number) to change cardinality for join, where these labels take part or labels only
  154. * 3) JoinOrder( (t1 t2) (t3 (t4 ...)) ) - fixate this join subtree in the general join tree
  155. */
  156. static TOptimizerHints Parse(const TString&);
  157. };
  158. /**
  159. * This is a temporary structure for KQP provider
  160. * We will soon be supporting multiple providers and we will need to design
  161. * some interfaces to pass provider-specific context to the optimizer
  162. */
  163. struct IProviderContext {
  164. virtual ~IProviderContext() = default;
  165. virtual double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgol) const = 0;
  166. virtual TOptimizerStatistics ComputeJoinStats(
  167. const TOptimizerStatistics& leftStats,
  168. const TOptimizerStatistics& rightStats,
  169. const TVector<NDq::TJoinColumn>& leftJoinKeys,
  170. const TVector<NDq::TJoinColumn>& rightJoinKeys,
  171. EJoinAlgoType joinAlgo,
  172. EJoinKind joinKind,
  173. TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const = 0;
  174. virtual bool IsJoinApplicable(const std::shared_ptr<IBaseOptimizerNode>& left,
  175. const std::shared_ptr<IBaseOptimizerNode>& right,
  176. const TVector<NDq::TJoinColumn>& leftJoinKeys,
  177. const TVector<NDq::TJoinColumn>& rightJoinKeys,
  178. EJoinAlgoType joinAlgo,
  179. EJoinKind joinKin) = 0;
  180. };
  181. /**
  182. * Default provider context with default cost and stats computation.
  183. */
  184. struct TBaseProviderContext : public IProviderContext {
  185. TBaseProviderContext() {}
  186. double ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const override;
  187. bool IsJoinApplicable(
  188. const std::shared_ptr<IBaseOptimizerNode>& leftStats,
  189. const std::shared_ptr<IBaseOptimizerNode>& rightStats,
  190. const TVector<NDq::TJoinColumn>& leftJoinKeys,
  191. const TVector<NDq::TJoinColumn>& rightJoinKeys,
  192. EJoinAlgoType joinAlgo,
  193. EJoinKind joinKind) override;
  194. virtual TOptimizerStatistics ComputeJoinStats(
  195. const TOptimizerStatistics& leftStats,
  196. const TOptimizerStatistics& rightStats,
  197. const TVector<NDq::TJoinColumn>& leftJoinKeys,
  198. const TVector<NDq::TJoinColumn>& rightJoinKeys,
  199. EJoinAlgoType joinAlgo,
  200. EJoinKind joinKind,
  201. TCardinalityHints::TCardinalityHint* maybeHint = nullptr) const override;
  202. static const TBaseProviderContext& Instance();
  203. };
  204. /**
  205. * RelOptimizerNode adds a label to base class
  206. * This is the label assinged to the input by equi-Join
  207. */
  208. struct TRelOptimizerNode : public IBaseOptimizerNode {
  209. TString Label;
  210. // Temporary solution to check if a LookupJoin is possible in KQP
  211. //void* Expr;
  212. TRelOptimizerNode(TString label, TOptimizerStatistics stats) :
  213. IBaseOptimizerNode(RelNodeType, std::move(stats)), Label(label) { }
  214. //TRelOptimizerNode(TString label, std::shared_ptr<TOptimizerStatistics> stats, const TExprNode::TPtr expr) :
  215. // IBaseOptimizerNode(RelNodeType, stats), Label(label), Expr(expr) { }
  216. virtual ~TRelOptimizerNode() {}
  217. virtual TVector<TString> Labels();
  218. virtual void Print(std::stringstream& stream, int ntabs=0);
  219. };
  220. /**
  221. * JoinOptimizerNode records the left and right arguments of the join
  222. * as well as the set of join conditions.
  223. * It also has methods to compute the statistics and cost of a join,
  224. * based on pre-computed costs and statistics of the children.
  225. */
  226. struct TJoinOptimizerNode : public IBaseOptimizerNode {
  227. std::shared_ptr<IBaseOptimizerNode> LeftArg;
  228. std::shared_ptr<IBaseOptimizerNode> RightArg;
  229. TVector<NDq::TJoinColumn> LeftJoinKeys;
  230. TVector<NDq::TJoinColumn> RightJoinKeys;
  231. EJoinKind JoinType;
  232. EJoinAlgoType JoinAlgo;
  233. /////////////////// 'ANY' flag means leaving only one row from the join side.
  234. bool LeftAny;
  235. bool RightAny;
  236. ///////////////////
  237. bool IsReorderable;
  238. TJoinOptimizerNode(const std::shared_ptr<IBaseOptimizerNode>& left,
  239. const std::shared_ptr<IBaseOptimizerNode>& right,
  240. TVector<NDq::TJoinColumn> leftKeys,
  241. TVector<NDq::TJoinColumn> rightKeys,
  242. const EJoinKind joinType,
  243. const EJoinAlgoType joinAlgo,
  244. bool leftAny,
  245. bool rightAny,
  246. bool nonReorderable = false
  247. );
  248. virtual ~TJoinOptimizerNode() {}
  249. virtual TVector<TString> Labels();
  250. virtual void Print(std::stringstream& stream, int ntabs=0);
  251. };
  252. struct IOptimizerNew {
  253. IProviderContext& Pctx;
  254. IOptimizerNew(IProviderContext& ctx) : Pctx(ctx) {}
  255. virtual ~IOptimizerNew() = default;
  256. virtual std::shared_ptr<TJoinOptimizerNode> JoinSearch(
  257. const std::shared_ptr<TJoinOptimizerNode>& joinTree,
  258. const TOptimizerHints& hints = {}
  259. ) = 0;
  260. };
  261. } // namespace NYql