source.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #pragma once
  2. #include "node.h"
  3. #include "match_recognize.h"
  4. #include <library/cpp/containers/sorted_vector/sorted_vector.h>
  5. namespace NSQLTranslationV1 {
  6. using TColumnsSets = NSorted::TSimpleSet<NSorted::TSimpleSet<TString>>;
  7. class ISource;
  8. typedef TIntrusivePtr<ISource> TSourcePtr;
  9. struct TTableRef {
  10. TString RefName;
  11. TString Service;
  12. TDeferredAtom Cluster;
  13. TNodePtr Keys;
  14. TNodePtr Options;
  15. TSourcePtr Source;
  16. TTableRef() = default;
  17. TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys);
  18. TTableRef(const TTableRef&) = default;
  19. TTableRef& operator=(const TTableRef&) = default;
  20. TString ShortName() const;
  21. };
  22. typedef TVector<TTableRef> TTableList;
  23. class IJoin;
  24. class ISource: public INode {
  25. public:
  26. virtual ~ISource();
  27. virtual bool IsFake() const;
  28. virtual void AllColumns();
  29. virtual const TColumns* GetColumns() const;
  30. virtual void GetInputTables(TTableList& tableList) const;
  31. /// in case of error unfilled, flag show if ensure column name
  32. virtual TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column);
  33. virtual void FinishColumns();
  34. virtual bool AddExpressions(TContext& ctx, const TVector<TNodePtr>& columns, EExprSeat exprSeat);
  35. virtual void SetFlattenByMode(const TString& mode);
  36. virtual void MarkFlattenColumns();
  37. virtual bool IsFlattenColumns() const;
  38. virtual bool AddFilter(TContext& ctx, TNodePtr filter);
  39. virtual bool AddGroupKey(TContext& ctx, const TString& column);
  40. virtual void SetCompactGroupBy(bool compactGroupBy);
  41. virtual void SetGroupBySuffix(const TString& suffix);
  42. virtual TString MakeLocalName(const TString& name);
  43. virtual bool AddAggregation(TContext& ctx, TAggregationPtr aggr);
  44. virtual bool AddFuncOverWindow(TContext& ctx, TNodePtr expr);
  45. virtual void AddTmpWindowColumn(const TString& column);
  46. virtual void SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize);
  47. virtual const TVector<TString>& GetTmpWindowColumns() const;
  48. virtual bool HasAggregations() const;
  49. virtual void AddWindowSpecs(TWinSpecs winSpecs);
  50. virtual bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func);
  51. virtual bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func);
  52. virtual void SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec);
  53. virtual TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindowSpec() const;
  54. virtual TNodePtr GetSessionWindowSpec() const;
  55. virtual TNodePtr GetHoppingWindowSpec() const;
  56. virtual bool IsCompositeSource() const;
  57. virtual bool IsGroupByColumn(const TString& column) const;
  58. virtual bool IsFlattenByColumns() const;
  59. virtual bool IsFlattenByExprs() const;
  60. virtual bool IsCalcOverWindow() const;
  61. virtual bool IsOverWindowSource() const;
  62. virtual bool IsStream() const;
  63. virtual EOrderKind GetOrderKind() const;
  64. virtual TWriteSettings GetWriteSettings() const;
  65. TNodePtr PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate);
  66. virtual bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause clause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed);
  67. virtual bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints);
  68. virtual bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& groupingColumn);
  69. virtual size_t GetGroupingColumnsCount() const;
  70. virtual TNodePtr BuildFilter(TContext& ctx, const TString& label);
  71. virtual TNodePtr BuildFilterLambda();
  72. virtual TNodePtr BuildFlattenByColumns(const TString& label);
  73. virtual TNodePtr BuildFlattenColumns(const TString& label);
  74. virtual TNodePtr BuildPreaggregatedMap(TContext& ctx);
  75. virtual TNodePtr BuildPreFlattenMap(TContext& ctx);
  76. virtual TNodePtr BuildPrewindowMap(TContext& ctx);
  77. virtual std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx);
  78. virtual TNodePtr BuildCalcOverWindow(TContext& ctx, const TString& label);
  79. virtual TNodePtr BuildSort(TContext& ctx, const TString& label);
  80. virtual TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label);
  81. virtual TNodePtr BuildGroupingColumns(const TString& label);
  82. virtual bool BuildSamplingLambda(TNodePtr& node);
  83. virtual bool SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate);
  84. virtual IJoin* GetJoin();
  85. virtual ISource* GetCompositeSource();
  86. virtual bool IsSelect() const;
  87. virtual bool IsTableSource() const;
  88. virtual bool ShouldUseSourceAsColumn(const TString& source) const;
  89. virtual bool IsJoinKeysInitializing() const;
  90. virtual const TString* GetWindowName() const;
  91. virtual bool HasMatchRecognize() const;
  92. virtual TNodePtr BuildMatchRecognize(TContext& ctx, TString&& inputTable);
  93. virtual bool DoInit(TContext& ctx, ISource* src);
  94. virtual TNodePtr Build(TContext& ctx) = 0;
  95. virtual TMaybe<TString> FindColumnMistype(const TString& name) const;
  96. virtual bool InitFilters(TContext& ctx);
  97. void AddDependentSource(ISource* usedSource);
  98. bool IsAlias(EExprSeat exprSeat, const TString& label) const;
  99. bool IsExprAlias(const TString& label) const;
  100. bool IsExprSeat(EExprSeat exprSeat, EExprType type = EExprType::WithExpression) const;
  101. TString GetGroupByColumnAlias(const TString& column) const;
  102. const TVector<TNodePtr>& Expressions(EExprSeat exprSeat) const;
  103. virtual TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const;
  104. TIntrusivePtr<ISource> CloneSource() const;
  105. TNodePtr BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume);
  106. protected:
  107. ISource(TPosition pos);
  108. virtual TAstNode* Translate(TContext& ctx) const;
  109. void FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortKeySelector, TNodePtr& sortDirection);
  110. TVector<TNodePtr>& Expressions(EExprSeat exprSeat);
  111. TNodePtr AliasOrColumn(const TNodePtr& node, bool withSource);
  112. TNodePtr BuildWindowFrame(const TFrameSpecification& spec, bool isCompact);
  113. THashSet<TString> ExprAliases;
  114. THashSet<TString> FlattenByAliases;
  115. THashMap<TString, TString> GroupByColumnAliases;
  116. TVector<TNodePtr> Filters;
  117. bool CompactGroupBy = false;
  118. TString GroupBySuffix;
  119. TSet<TString> GroupKeys;
  120. TVector<TString> OrderedGroupKeys;
  121. std::array<TVector<TNodePtr>, static_cast<unsigned>(EExprSeat::Max)> NamedExprs;
  122. TVector<TAggregationPtr> Aggregations;
  123. TMap<TString, TVector<TAggregationPtr>> AggregationOverWindow;
  124. TMap<TString, TVector<TNodePtr>> FuncOverWindow;
  125. TWinSpecs WinSpecs;
  126. TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
  127. TNodePtr SessionWindow;
  128. TNodePtr HoppingWindow;
  129. TVector<ISource*> UsedSources;
  130. TString FlattenMode;
  131. bool FlattenColumns = false;
  132. THashMap<TString, ui32> GenIndexes;
  133. TVector<TString> TmpWindowColumns;
  134. TNodePtr SamplingRate;
  135. TMatchRecognizeBuilderPtr MatchRecognizeBuilder;
  136. };
  137. template<>
  138. inline TVector<TSourcePtr> CloneContainer<TSourcePtr>(const TVector<TSourcePtr>& args) {
  139. TVector<TSourcePtr> cloneArgs;
  140. cloneArgs.reserve(args.size());
  141. for (const auto& arg: args) {
  142. cloneArgs.emplace_back(arg ? arg->CloneSource() : nullptr);
  143. }
  144. return cloneArgs;
  145. }
  146. struct TJoinLinkSettings {
  147. enum class EStrategy {
  148. Default,
  149. SortedMerge,
  150. StreamLookup,
  151. ForceMap,
  152. ForceGrace
  153. };
  154. EStrategy Strategy = EStrategy::Default;
  155. bool Compact = false;
  156. };
  157. class IJoin: public ISource {
  158. public:
  159. virtual ~IJoin();
  160. virtual IJoin* GetJoin();
  161. virtual TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) = 0;
  162. virtual void SetupJoin(const TString& joinOp, TNodePtr joinExpr, const TJoinLinkSettings& linkSettings) = 0;
  163. virtual const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const = 0;
  164. virtual TVector<TString> GetJoinLabels() const = 0;
  165. protected:
  166. IJoin(TPosition pos);
  167. };
  168. class TSessionWindow final : public INode {
  169. public:
  170. TSessionWindow(TPosition pos, const TVector<TNodePtr>& args);
  171. void MarkValid();
  172. TNodePtr BuildTraits(const TString& label) const;
  173. private:
  174. bool DoInit(TContext& ctx, ISource* src) override;
  175. TAstNode* Translate(TContext&) const override;
  176. void DoUpdateState() const override;
  177. TNodePtr DoClone() const override;
  178. TString GetOpName() const override;
  179. TVector<TNodePtr> Args;
  180. TSourcePtr FakeSource;
  181. TNodePtr Node;
  182. bool Valid;
  183. };
  184. class THoppingWindow final : public INode {
  185. public:
  186. THoppingWindow(TPosition pos, const TVector<TNodePtr>& args);
  187. void MarkValid();
  188. TNodePtr BuildTraits(const TString& label) const;
  189. public:
  190. TNodePtr Hop;
  191. TNodePtr Interval;
  192. private:
  193. bool DoInit(TContext& ctx, ISource* src) override;
  194. TAstNode* Translate(TContext&) const override;
  195. void DoUpdateState() const override;
  196. TNodePtr DoClone() const override;
  197. TString GetOpName() const override;
  198. TNodePtr ProcessIntervalParam(const TNodePtr& val) const;
  199. TVector<TNodePtr> Args;
  200. TSourcePtr FakeSource;
  201. TNodePtr Node;
  202. bool Valid;
  203. };
  204. // Implemented in join.cpp
  205. TString NormalizeJoinOp(const TString& joinOp);
  206. TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes);
  207. // Implemented in select.cpp
  208. TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped);
  209. TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex = -1);
  210. TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos);
  211. TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist = false);
  212. TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources);
  213. TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom = false, bool inSubquery = false);
  214. TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList = false);
  215. TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label = TString());
  216. TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label = TString());
  217. TSourcePtr BuildRefColumnSource(TPosition pos, const TString& partExpression);
  218. TSourcePtr BuildUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings);
  219. TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource);
  220. TNodePtr BuildOrderBy(TPosition pos, const TVector<TNodePtr>& keys, const TVector<bool>& order);
  221. TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take);
  222. TSourcePtr BuildSelectCore(
  223. TContext& ctx,
  224. TPosition pos,
  225. TSourcePtr source,
  226. const TVector<TNodePtr>& groupByExpr,
  227. const TVector<TNodePtr>& groupBy,
  228. bool compactGroupBy,
  229. const TString& groupBySuffix,
  230. bool assumeSorted,
  231. const TVector<TSortSpecificationPtr>& orderBy,
  232. TNodePtr having,
  233. TWinSpecs&& windowSpec,
  234. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  235. TVector<TNodePtr>&& terms,
  236. bool distinct,
  237. TVector<TNodePtr>&& without,
  238. bool selectStream,
  239. const TWriteSettings& settings,
  240. TColumnsSets&& uniqueSets,
  241. TColumnsSets&& distinctSets
  242. );
  243. TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake);
  244. enum class ReduceMode {
  245. ByPartition,
  246. ByAll,
  247. };
  248. TSourcePtr BuildReduce(TPosition pos, ReduceMode mode, TSourcePtr source, TVector<TSortSpecificationPtr>&& orderBy,
  249. TVector<TNodePtr>&& keys, TVector<TNodePtr>&& args, TNodePtr udf, TNodePtr having, const TWriteSettings& settings,
  250. const TVector<TSortSpecificationPtr>& assumeOrderBy, bool listCall);
  251. TSourcePtr BuildProcess(TPosition pos, TSourcePtr source, TNodePtr with, bool withExtFunction, TVector<TNodePtr>&& terms, bool listCall,
  252. bool prcessStream, const TWriteSettings& settings, const TVector<TSortSpecificationPtr>& assumeOrderBy);
  253. TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, TScopedStatePtr scoped);
  254. // Implemented in insert.cpp
  255. TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values);
  256. TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, TSourcePtr source);
  257. TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values);
  258. EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode);
  259. TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns);
  260. TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints);
  261. TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options = nullptr);
  262. TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options = nullptr);
  263. TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options = nullptr);
  264. // Implemented in query.cpp
  265. TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TViewDescription& view);
  266. TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& func, const TVector<TTableArg>& args);
  267. TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name);
  268. TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints);
  269. TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped);
  270. TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr source, TScopedStatePtr scoped);
  271. TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped);
  272. TNodePtr BuildDropTable(TPosition pos, const TTableRef& table, bool missingOk, ETableType tableType, TScopedStatePtr scoped);
  273. TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options,
  274. TScopedStatePtr scoped);
  275. TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped);
  276. TNodePtr BuildAlterSequence(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& id, const TSequenceParameters& params, TScopedStatePtr scoped);
  277. TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster,
  278. TNodePtr node, const TString& view = {});
  279. void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix = {});
  280. TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix = {});
  281. TString NormalizeTypeString(const TString& str);
  282. } // namespace NSQLTranslationV1