source.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. #pragma once
  2. #include "node.h"
  3. #include "match_recognize.h"
  4. #include <library/cpp/containers/sorted_vector/sorted_vector.h>
  5. namespace NSQLTranslationV1 {
  6. using TColumnsSets = NSorted::TSimpleSet<NSorted::TSimpleSet<TString>>;
  7. class ISource;
  8. typedef TIntrusivePtr<ISource> TSourcePtr;
  9. struct TTableRef {
  10. TString RefName;
  11. TString Service;
  12. TDeferredAtom Cluster;
  13. TNodePtr Keys;
  14. TNodePtr Options;
  15. TSourcePtr Source;
  16. TTableRef() = default;
  17. TTableRef(const TString& refName, const TString& service, const TDeferredAtom& cluster, TNodePtr keys);
  18. TTableRef(const TTableRef&) = default;
  19. TTableRef& operator=(const TTableRef&) = default;
  20. TString ShortName() const;
  21. };
  22. typedef TVector<TTableRef> TTableList;
  23. class IJoin;
  24. class ISource: public INode {
  25. public:
  26. virtual ~ISource();
  27. virtual bool IsFake() const;
  28. virtual void AllColumns();
  29. virtual const TColumns* GetColumns() const;
  30. virtual void GetInputTables(TTableList& tableList) const;
  31. /// in case of error unfilled, flag show if ensure column name
  32. virtual TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column);
  33. virtual void FinishColumns();
  34. virtual bool AddExpressions(TContext& ctx, const TVector<TNodePtr>& columns, EExprSeat exprSeat);
  35. virtual void SetFlattenByMode(const TString& mode);
  36. virtual void MarkFlattenColumns();
  37. virtual bool IsFlattenColumns() const;
  38. virtual bool AddFilter(TContext& ctx, TNodePtr filter);
  39. virtual bool AddGroupKey(TContext& ctx, const TString& column);
  40. virtual void SetCompactGroupBy(bool compactGroupBy);
  41. virtual void SetGroupBySuffix(const TString& suffix);
  42. virtual TString MakeLocalName(const TString& name);
  43. virtual bool AddAggregation(TContext& ctx, TAggregationPtr aggr);
  44. virtual bool AddFuncOverWindow(TContext& ctx, TNodePtr expr);
  45. virtual void AddTmpWindowColumn(const TString& column);
  46. virtual void SetMatchRecognize(TMatchRecognizeBuilderPtr matchRecognize);
  47. virtual const TVector<TString>& GetTmpWindowColumns() const;
  48. virtual bool HasAggregations() const;
  49. virtual void AddWindowSpecs(TWinSpecs winSpecs);
  50. virtual bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func);
  51. virtual bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func);
  52. virtual void SetLegacyHoppingWindowSpec(TLegacyHoppingWindowSpecPtr spec);
  53. virtual TLegacyHoppingWindowSpecPtr GetLegacyHoppingWindowSpec() const;
  54. virtual TNodePtr GetSessionWindowSpec() const;
  55. virtual TNodePtr GetHoppingWindowSpec() const;
  56. virtual bool IsCompositeSource() const;
  57. virtual bool IsGroupByColumn(const TString& column) const;
  58. virtual bool IsFlattenByColumns() const;
  59. virtual bool IsFlattenByExprs() const;
  60. virtual bool IsCalcOverWindow() const;
  61. virtual bool IsOverWindowSource() const;
  62. virtual bool IsStream() const;
  63. virtual EOrderKind GetOrderKind() const;
  64. virtual TWriteSettings GetWriteSettings() const;
  65. TNodePtr PrepareSamplingRate(TPosition pos, ESampleClause clause, TNodePtr samplingRate);
  66. virtual bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause clause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed);
  67. virtual bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints);
  68. virtual bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& groupingColumn);
  69. virtual size_t GetGroupingColumnsCount() const;
  70. virtual TNodePtr BuildFilter(TContext& ctx, const TString& label);
  71. virtual TNodePtr BuildFilterLambda();
  72. virtual TNodePtr BuildFlattenByColumns(const TString& label);
  73. virtual TNodePtr BuildFlattenColumns(const TString& label);
  74. virtual TNodePtr BuildPreaggregatedMap(TContext& ctx);
  75. virtual TNodePtr BuildPreFlattenMap(TContext& ctx);
  76. virtual TNodePtr BuildPrewindowMap(TContext& ctx);
  77. virtual std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx);
  78. virtual TNodePtr BuildCalcOverWindow(TContext& ctx, const TString& label);
  79. virtual TNodePtr BuildSort(TContext& ctx, const TString& label);
  80. virtual TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label);
  81. virtual TNodePtr BuildGroupingColumns(const TString& label);
  82. virtual bool BuildSamplingLambda(TNodePtr& node);
  83. virtual bool SetSamplingRate(TContext& ctx, ESampleClause clause, TNodePtr samplingRate);
  84. virtual IJoin* GetJoin();
  85. virtual ISource* GetCompositeSource();
  86. virtual bool IsSelect() const;
  87. virtual bool IsTableSource() const;
  88. virtual bool ShouldUseSourceAsColumn(const TString& source) const;
  89. virtual bool IsJoinKeysInitializing() const;
  90. virtual const TString* GetWindowName() const;
  91. virtual bool HasMatchRecognize() const;
  92. virtual TNodePtr BuildMatchRecognize(TContext& ctx, TString&& inputTable);
  93. virtual bool DoInit(TContext& ctx, ISource* src);
  94. virtual TNodePtr Build(TContext& ctx) = 0;
  95. virtual TMaybe<TString> FindColumnMistype(const TString& name) const;
  96. virtual bool InitFilters(TContext& ctx);
  97. void AddDependentSource(ISource* usedSource);
  98. bool IsAlias(EExprSeat exprSeat, const TString& label) const;
  99. bool IsExprAlias(const TString& label) const;
  100. bool IsExprSeat(EExprSeat exprSeat, EExprType type = EExprType::WithExpression) const;
  101. TString GetGroupByColumnAlias(const TString& column) const;
  102. const TVector<TNodePtr>& Expressions(EExprSeat exprSeat) const;
  103. virtual TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const;
  104. TIntrusivePtr<ISource> CloneSource() const;
  105. TNodePtr BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, bool traits, bool assume);
  106. protected:
  107. ISource(TPosition pos);
  108. virtual TAstNode* Translate(TContext& ctx) const;
  109. void FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortKeySelector, TNodePtr& sortDirection);
  110. TVector<TNodePtr>& Expressions(EExprSeat exprSeat);
  111. TNodePtr AliasOrColumn(const TNodePtr& node, bool withSource);
  112. TNodePtr BuildWindowFrame(const TFrameSpecification& spec, bool isCompact);
  113. THashSet<TString> ExprAliases;
  114. THashSet<TString> FlattenByAliases;
  115. THashMap<TString, TString> GroupByColumnAliases;
  116. TVector<TNodePtr> Filters;
  117. bool CompactGroupBy = false;
  118. TString GroupBySuffix;
  119. TSet<TString> GroupKeys;
  120. TVector<TString> OrderedGroupKeys;
  121. std::array<TVector<TNodePtr>, static_cast<unsigned>(EExprSeat::Max)> NamedExprs;
  122. TVector<TAggregationPtr> Aggregations;
  123. TMap<TString, TVector<TAggregationPtr>> AggregationOverWindow;
  124. TMap<TString, TVector<TNodePtr>> FuncOverWindow;
  125. TWinSpecs WinSpecs;
  126. TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
  127. TNodePtr SessionWindow;
  128. TNodePtr HoppingWindow;
  129. TVector<ISource*> UsedSources;
  130. TString FlattenMode;
  131. bool FlattenColumns = false;
  132. THashMap<TString, ui32> GenIndexes;
  133. TVector<TString> TmpWindowColumns;
  134. TNodePtr SamplingRate;
  135. TMatchRecognizeBuilderPtr MatchRecognizeBuilder;
  136. };
  137. template<>
  138. inline TVector<TSourcePtr> CloneContainer<TSourcePtr>(const TVector<TSourcePtr>& args) {
  139. TVector<TSourcePtr> cloneArgs;
  140. cloneArgs.reserve(args.size());
  141. for (const auto& arg: args) {
  142. cloneArgs.emplace_back(arg ? arg->CloneSource() : nullptr);
  143. }
  144. return cloneArgs;
  145. }
  146. struct TJoinLinkSettings {
  147. enum class EStrategy {
  148. Default,
  149. SortedMerge,
  150. StreamLookup,
  151. ForceMap,
  152. ForceGrace
  153. };
  154. EStrategy Strategy = EStrategy::Default;
  155. TVector<TString> Values;
  156. bool Compact = false;
  157. };
  158. class IJoin: public ISource {
  159. public:
  160. virtual ~IJoin();
  161. virtual IJoin* GetJoin();
  162. virtual TNodePtr BuildJoinKeys(TContext& ctx, const TVector<TDeferredAtom>& names) = 0;
  163. virtual void SetupJoin(const TString& joinOp, TNodePtr joinExpr, const TJoinLinkSettings& linkSettings) = 0;
  164. virtual const THashMap<TString, THashSet<TString>>& GetSameKeysMap() const = 0;
  165. virtual TVector<TString> GetJoinLabels() const = 0;
  166. protected:
  167. IJoin(TPosition pos);
  168. };
  169. class TSessionWindow final : public INode {
  170. public:
  171. TSessionWindow(TPosition pos, const TVector<TNodePtr>& args);
  172. void MarkValid();
  173. TNodePtr BuildTraits(const TString& label) const;
  174. private:
  175. bool DoInit(TContext& ctx, ISource* src) override;
  176. TAstNode* Translate(TContext&) const override;
  177. void DoUpdateState() const override;
  178. TNodePtr DoClone() const override;
  179. TString GetOpName() const override;
  180. TVector<TNodePtr> Args;
  181. TSourcePtr FakeSource;
  182. TNodePtr Node;
  183. bool Valid;
  184. };
  185. class THoppingWindow final : public INode {
  186. public:
  187. THoppingWindow(TPosition pos, const TVector<TNodePtr>& args);
  188. void MarkValid();
  189. TNodePtr BuildTraits(const TString& label) const;
  190. public:
  191. TNodePtr Hop;
  192. TNodePtr Interval;
  193. private:
  194. bool DoInit(TContext& ctx, ISource* src) override;
  195. TAstNode* Translate(TContext&) const override;
  196. void DoUpdateState() const override;
  197. TNodePtr DoClone() const override;
  198. TString GetOpName() const override;
  199. TNodePtr ProcessIntervalParam(const TNodePtr& val) const;
  200. TVector<TNodePtr> Args;
  201. TSourcePtr FakeSource;
  202. TNodePtr Node;
  203. bool Valid;
  204. };
  205. // Implemented in join.cpp
  206. TString NormalizeJoinOp(const TString& joinOp);
  207. TSourcePtr BuildEquiJoin(TPosition pos, TVector<TSourcePtr>&& sources, TVector<bool>&& anyFlags, bool strictJoinKeyTypes);
  208. // Implemented in select.cpp
  209. TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped);
  210. TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex = -1);
  211. TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos);
  212. TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist = false, bool withTables = false);
  213. TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources);
  214. TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom = false, bool inSubquery = false);
  215. TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList = false, bool wrapByTableSource = false);
  216. TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label = TString());
  217. TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label = TString());
  218. TSourcePtr BuildRefColumnSource(TPosition pos, const TString& partExpression);
  219. TSourcePtr BuildUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings);
  220. TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource);
  221. TNodePtr BuildOrderBy(TPosition pos, const TVector<TNodePtr>& keys, const TVector<bool>& order);
  222. TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take);
  223. TSourcePtr BuildSelectCore(
  224. TContext& ctx,
  225. TPosition pos,
  226. TSourcePtr source,
  227. const TVector<TNodePtr>& groupByExpr,
  228. const TVector<TNodePtr>& groupBy,
  229. bool compactGroupBy,
  230. const TString& groupBySuffix,
  231. bool assumeSorted,
  232. const TVector<TSortSpecificationPtr>& orderBy,
  233. TNodePtr having,
  234. TWinSpecs&& windowSpec,
  235. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  236. TVector<TNodePtr>&& terms,
  237. bool distinct,
  238. TVector<TNodePtr>&& without,
  239. bool selectStream,
  240. const TWriteSettings& settings,
  241. TColumnsSets&& uniqueSets,
  242. TColumnsSets&& distinctSets
  243. );
  244. TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake);
  245. enum class ReduceMode {
  246. ByPartition,
  247. ByAll,
  248. };
  249. TSourcePtr BuildReduce(TPosition pos, ReduceMode mode, TSourcePtr source, TVector<TSortSpecificationPtr>&& orderBy,
  250. TVector<TNodePtr>&& keys, TVector<TNodePtr>&& args, TNodePtr udf, TNodePtr having, const TWriteSettings& settings,
  251. const TVector<TSortSpecificationPtr>& assumeOrderBy, bool listCall);
  252. TSourcePtr BuildProcess(TPosition pos, TSourcePtr source, TNodePtr with, bool withExtFunction, TVector<TNodePtr>&& terms, bool listCall,
  253. bool prcessStream, const TWriteSettings& settings, const TVector<TSortSpecificationPtr>& assumeOrderBy);
  254. TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery, TScopedStatePtr scoped);
  255. // Implemented in insert.cpp
  256. TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, const TVector<TVector<TNodePtr>>& values);
  257. TSourcePtr BuildWriteValues(TPosition pos, const TString& opertationHumanName, const TVector<TString>& columnsHint, TSourcePtr source);
  258. TSourcePtr BuildUpdateValues(TPosition pos, const TVector<TString>& columnsHint, const TVector<TNodePtr>& values);
  259. EWriteColumnMode ToWriteColumnsMode(ESQLWriteColumnMode sqlWriteColumnMode);
  260. TNodePtr BuildEraseColumns(TPosition pos, const TVector<TString>& columns);
  261. TNodePtr BuildIntoTableOptions(TPosition pos, const TVector<TString>& eraseColumns, const TTableHints& hints);
  262. TNodePtr BuildWriteColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, EWriteColumnMode mode, TSourcePtr values, TNodePtr options = nullptr);
  263. TNodePtr BuildUpdateColumns(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options = nullptr);
  264. TNodePtr BuildBatchUpdate(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr values, TSourcePtr source, TNodePtr options = nullptr);
  265. TNodePtr BuildDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options = nullptr);
  266. TNodePtr BuildBatchDelete(TPosition pos, TScopedStatePtr scoped, const TTableRef& table, TSourcePtr source, TNodePtr options = nullptr);
  267. // Implemented in query.cpp
  268. TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped);
  269. TNodePtr BuildAlterDatabase(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAlterDatabaseParameters& params,TScopedStatePtr scoped);
  270. TNodePtr BuildTableKey(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TDeferredAtom& name, const TViewDescription& view);
  271. TNodePtr BuildTableKeys(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& func, const TVector<TTableArg>& args);
  272. TNodePtr BuildTopicKey(TPosition pos, const TDeferredAtom& cluster, const TDeferredAtom& name);
  273. TNodePtr BuildInputOptions(TPosition pos, const TTableHints& hints);
  274. TNodePtr BuildInputTables(TPosition pos, const TTableList& tables, bool inSubquery, TScopedStatePtr scoped);
  275. TNodePtr BuildCreateTable(TPosition pos, const TTableRef& tr, bool existingOk, bool replaceIfExists, const TCreateTableParameters& params, TSourcePtr source, TScopedStatePtr scoped);
  276. TNodePtr BuildAlterTable(TPosition pos, const TTableRef& tr, const TAlterTableParameters& params, TScopedStatePtr scoped);
  277. TNodePtr BuildDropTable(TPosition pos, const TTableRef& table, bool missingOk, ETableType tableType, TScopedStatePtr scoped);
  278. TNodePtr BuildWriteTable(TPosition pos, const TString& label, const TTableRef& table, EWriteColumnMode mode, TNodePtr options,
  279. TScopedStatePtr scoped);
  280. TNodePtr BuildAnalyze(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TAnalyzeParams& params, TScopedStatePtr scoped);
  281. TNodePtr BuildAlterSequence(TPosition pos, const TString& service, const TDeferredAtom& cluster, const TString& id, const TSequenceParameters& params, TScopedStatePtr scoped);
  282. TSourcePtr TryMakeSourceFromExpression(TPosition pos, TContext& ctx, const TString& currService, const TDeferredAtom& currCluster,
  283. TNodePtr node, const TString& view = {});
  284. void MakeTableFromExpression(TPosition pos, TContext& ctx, TNodePtr node, TDeferredAtom& table, const TString& prefix = {});
  285. TDeferredAtom MakeAtomFromExpression(TPosition pos, TContext& ctx, TNodePtr node, const TString& prefix = {});
  286. TString NormalizeTypeString(const TString& str);
  287. } // namespace NSQLTranslationV1