select.cpp 106 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237
  1. #include "sql.h"
  2. #include "source.h"
  3. #include "context.h"
  4. #include "match_recognize.h"
  5. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  6. #include <yql/essentials/utils/yql_panic.h>
  7. #include <library/cpp/charset/ci_string.h>
  8. using namespace NYql;
  9. namespace NSQLTranslationV1 {
  10. class TSubqueryNode: public INode {
  11. public:
  12. TSubqueryNode(TSourcePtr&& source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped)
  13. : INode(source->GetPos())
  14. , Source(std::move(source))
  15. , Alias(alias)
  16. , InSubquery(inSubquery)
  17. , EnsureTupleSize(ensureTupleSize)
  18. , Scoped(scoped)
  19. {
  20. YQL_ENSURE(!Alias.empty());
  21. }
  22. ISource* GetSource() override {
  23. return Source.Get();
  24. }
  25. bool DoInit(TContext& ctx, ISource* src) override {
  26. YQL_ENSURE(!src, "Source not expected for subquery node");
  27. Source->UseAsInner();
  28. if (!Source->Init(ctx, nullptr)) {
  29. return false;
  30. }
  31. TTableList tableList;
  32. Source->GetInputTables(tableList);
  33. auto tables = BuildInputTables(Pos, tableList, InSubquery, Scoped);
  34. if (!tables->Init(ctx, Source.Get())) {
  35. return false;
  36. }
  37. auto source = Source->Build(ctx);
  38. if (!source) {
  39. return false;
  40. }
  41. if (EnsureTupleSize != -1) {
  42. source = Y("EnsureTupleSize", source, Q(ToString(EnsureTupleSize)));
  43. }
  44. Node = Y("let", Alias, Y("block", Q(L(tables, Y("return", Q(Y("world", source)))))));
  45. IsUsed = true;
  46. return true;
  47. }
  48. void DoUpdateState() const override {
  49. State.Set(ENodeState::Const, true);
  50. }
  51. bool UsedSubquery() const override {
  52. return IsUsed;
  53. }
  54. TAstNode* Translate(TContext& ctx) const override {
  55. Y_DEBUG_ABORT_UNLESS(Node);
  56. return Node->Translate(ctx);
  57. }
  58. const TString* SubqueryAlias() const override {
  59. return &Alias;
  60. }
  61. TPtr DoClone() const final {
  62. return new TSubqueryNode(Source->CloneSource(), Alias, InSubquery, EnsureTupleSize, Scoped);
  63. }
  64. protected:
  65. TSourcePtr Source;
  66. TNodePtr Node;
  67. const TString Alias;
  68. const bool InSubquery;
  69. const int EnsureTupleSize;
  70. bool IsUsed = false;
  71. TScopedStatePtr Scoped;
  72. };
  73. TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) {
  74. return new TSubqueryNode(std::move(source), alias, inSubquery, ensureTupleSize, scoped);
  75. }
  76. class TSourceNode: public INode {
  77. public:
  78. TSourceNode(TPosition pos, TSourcePtr&& source, bool checkExist, bool withTables)
  79. : INode(pos)
  80. , Source(std::move(source))
  81. , CheckExist(checkExist)
  82. , WithTables(withTables)
  83. {}
  84. ISource* GetSource() override {
  85. return Source.Get();
  86. }
  87. bool DoInit(TContext& ctx, ISource* src) override {
  88. if (AsInner) {
  89. Source->UseAsInner();
  90. }
  91. if (!Source->Init(ctx, src)) {
  92. return false;
  93. }
  94. Node = Source->Build(ctx);
  95. if (!Node) {
  96. return false;
  97. }
  98. if (src) {
  99. if (IsSubquery()) {
  100. /// should be not used?
  101. auto columnsPtr = Source->GetColumns();
  102. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  103. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  104. } else {
  105. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  106. if (RefPos) {
  107. ctx.Error(*RefPos) << "Source is used here";
  108. }
  109. return false;
  110. }
  111. }
  112. src->AddDependentSource(Source.Get());
  113. }
  114. if (Node && WithTables) {
  115. TTableList tableList;
  116. Source->GetInputTables(tableList);
  117. TNodePtr inputTables(BuildInputTables(ctx.Pos(), tableList, IsSubquery(), ctx.Scoped));
  118. if (!inputTables->Init(ctx, Source.Get())) {
  119. return false;
  120. }
  121. auto blockContent = inputTables;
  122. blockContent = L(blockContent, Y("return", Node));
  123. Node = Y("block", Q(blockContent));
  124. }
  125. return true;
  126. }
  127. bool IsSubquery() const {
  128. return !AsInner && Source->IsSelect() && !CheckExist;
  129. }
  130. void DoUpdateState() const override {
  131. State.Set(ENodeState::Const, IsSubquery());
  132. }
  133. TAstNode* Translate(TContext& ctx) const override {
  134. Y_DEBUG_ABORT_UNLESS(Node);
  135. return Node->Translate(ctx);
  136. }
  137. TPtr DoClone() const final {
  138. return new TSourceNode(Pos, Source->CloneSource(), CheckExist, WithTables);
  139. }
  140. protected:
  141. TSourcePtr Source;
  142. TNodePtr Node;
  143. bool CheckExist;
  144. bool WithTables;
  145. };
  146. TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist, bool withTables) {
  147. return new TSourceNode(pos, std::move(source), checkExist, withTables);
  148. }
  149. class TFakeSource: public ISource {
  150. public:
  151. TFakeSource(TPosition pos, bool missingFrom, bool inSubquery)
  152. : ISource(pos)
  153. , MissingFrom(missingFrom)
  154. , InSubquery(inSubquery)
  155. {}
  156. bool IsFake() const override {
  157. return true;
  158. }
  159. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  160. // TODO: fix column reference scope - with proper scopes error below should happen earlier
  161. if (column.CanBeType()) {
  162. return true;
  163. }
  164. ctx.Error(Pos) << (MissingFrom ? "Column references are not allowed without FROM" : "Source does not allow column references");
  165. ctx.Error(column.GetPos()) << "Column reference "
  166. << (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)");
  167. return {};
  168. }
  169. bool AddFilter(TContext& ctx, TNodePtr filter) override {
  170. Y_UNUSED(filter);
  171. auto pos = filter ? filter->GetPos() : Pos;
  172. ctx.Error(pos) << (MissingFrom ? "Filtering is not allowed without FROM" : "Source does not allow filtering");
  173. return false;
  174. }
  175. TNodePtr Build(TContext& ctx) override {
  176. Y_UNUSED(ctx);
  177. auto ret = Y("AsList", Y("AsStruct"));
  178. if (InSubquery) {
  179. return Y("WithWorld", ret, "world");
  180. } else {
  181. return ret;
  182. }
  183. }
  184. bool AddGroupKey(TContext& ctx, const TString& column) override {
  185. Y_UNUSED(column);
  186. ctx.Error(Pos) << "Grouping is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  187. return false;
  188. }
  189. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  190. YQL_ENSURE(aggr);
  191. ctx.Error(aggr->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  192. return false;
  193. }
  194. bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) override {
  195. Y_UNUSED(windowName);
  196. YQL_ENSURE(func);
  197. ctx.Error(func->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  198. return false;
  199. }
  200. bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) override {
  201. Y_UNUSED(windowName);
  202. YQL_ENSURE(func);
  203. ctx.Error(func->GetPos()) << "Window functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  204. return false;
  205. }
  206. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  207. Y_UNUSED(windowName);
  208. ctx.Error(Pos) << "Window and aggregation functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  209. return {};
  210. }
  211. bool IsGroupByColumn(const TString& column) const override {
  212. Y_UNUSED(column);
  213. return false;
  214. }
  215. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  216. Y_UNUSED(ctx);
  217. Y_UNUSED(label);
  218. return nullptr;
  219. }
  220. std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override {
  221. Y_UNUSED(label);
  222. Y_UNUSED(ctx);
  223. return { nullptr, true };
  224. }
  225. TPtr DoClone() const final {
  226. return new TFakeSource(Pos, MissingFrom, InSubquery);
  227. }
  228. private:
  229. const bool MissingFrom;
  230. const bool InSubquery;
  231. };
  232. TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom, bool inSubquery) {
  233. return new TFakeSource(pos, missingFrom, inSubquery);
  234. }
  235. class TNodeSource: public ISource {
  236. public:
  237. TNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList, bool wrapByTableSource)
  238. : ISource(pos)
  239. , Node(node)
  240. , WrapToList(wrapToList)
  241. , WrapByTableSource(wrapByTableSource)
  242. {
  243. YQL_ENSURE(Node);
  244. FakeSource = BuildFakeSource(pos);
  245. }
  246. bool ShouldUseSourceAsColumn(const TString& source) const final {
  247. return source && source != GetLabel();
  248. }
  249. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  250. Y_UNUSED(ctx);
  251. Y_UNUSED(column);
  252. return true;
  253. }
  254. bool DoInit(TContext& ctx, ISource* src) final {
  255. if (!Node->Init(ctx, FakeSource.Get())) {
  256. return false;
  257. }
  258. return ISource::DoInit(ctx, src);
  259. }
  260. TNodePtr Build(TContext& /*ctx*/) final {
  261. auto nodeAst = AstNode(Node);
  262. if (WrapToList) {
  263. nodeAst = Y("ToList", nodeAst);
  264. }
  265. if (WrapByTableSource) {
  266. nodeAst = Y("TableSource", nodeAst);
  267. }
  268. return nodeAst;
  269. }
  270. TPtr DoClone() const final {
  271. return new TNodeSource(Pos, SafeClone(Node), WrapToList, WrapByTableSource);
  272. }
  273. private:
  274. TNodePtr Node;
  275. const bool WrapToList;
  276. const bool WrapByTableSource;
  277. TSourcePtr FakeSource;
  278. };
  279. TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList, bool wrapByTableSource) {
  280. return new TNodeSource(pos, node, wrapToList, wrapByTableSource);
  281. }
  282. class IProxySource: public ISource {
  283. protected:
  284. IProxySource(TPosition pos, ISource* src)
  285. : ISource(pos)
  286. , Source(src)
  287. {}
  288. void AllColumns() override {
  289. Y_DEBUG_ABORT_UNLESS(Source);
  290. return Source->AllColumns();
  291. }
  292. const TColumns* GetColumns() const override {
  293. Y_DEBUG_ABORT_UNLESS(Source);
  294. return Source->GetColumns();
  295. }
  296. void GetInputTables(TTableList& tableList) const override {
  297. if (Source) {
  298. Source->GetInputTables(tableList);
  299. }
  300. ISource::GetInputTables(tableList);
  301. }
  302. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  303. Y_DEBUG_ABORT_UNLESS(Source);
  304. const TString label(Source->GetLabel());
  305. Source->SetLabel(Label);
  306. const auto ret = Source->AddColumn(ctx, column);
  307. Source->SetLabel(label);
  308. return ret;
  309. }
  310. bool ShouldUseSourceAsColumn(const TString& source) const override {
  311. return Source->ShouldUseSourceAsColumn(source);
  312. }
  313. bool IsStream() const override {
  314. Y_DEBUG_ABORT_UNLESS(Source);
  315. return Source->IsStream();
  316. }
  317. EOrderKind GetOrderKind() const override {
  318. Y_DEBUG_ABORT_UNLESS(Source);
  319. return Source->GetOrderKind();
  320. }
  321. TWriteSettings GetWriteSettings() const override {
  322. Y_DEBUG_ABORT_UNLESS(Source);
  323. return Source->GetWriteSettings();
  324. }
  325. protected:
  326. void SetSource(ISource* source) {
  327. Source = source;
  328. }
  329. ISource* Source;
  330. };
  331. class IRealSource: public ISource {
  332. protected:
  333. IRealSource(TPosition pos)
  334. : ISource(pos)
  335. {
  336. }
  337. void AllColumns() override {
  338. Columns.SetAll();
  339. }
  340. const TColumns* GetColumns() const override {
  341. return &Columns;
  342. }
  343. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  344. const auto& label = *column.GetSourceName();
  345. const auto& source = GetLabel();
  346. if (!label.empty() && label != source && !(source.StartsWith(label) && source[label.size()] == ':')) {
  347. if (column.IsReliable()) {
  348. ctx.Error(column.GetPos()) << "Unknown correlation name: " << label;
  349. }
  350. return {};
  351. }
  352. if (column.IsAsterisk()) {
  353. return true;
  354. }
  355. const auto* name = column.GetColumnName();
  356. if (name && !column.CanBeType() && !Columns.IsColumnPossible(ctx, *name) && !IsAlias(EExprSeat::GroupBy, *name) && !IsAlias(EExprSeat::DistinctAggr, *name)) {
  357. if (column.IsReliable()) {
  358. TStringBuilder sb;
  359. sb << "Column " << *name << " is not in source column set";
  360. if (const auto mistype = FindColumnMistype(*name)) {
  361. sb << ". Did you mean " << mistype.GetRef() << "?";
  362. }
  363. ctx.Error(column.GetPos()) << sb;
  364. }
  365. return {};
  366. }
  367. return true;
  368. }
  369. TMaybe<TString> FindColumnMistype(const TString& name) const override {
  370. auto result = FindMistypeIn(Columns.Real, name);
  371. if (!result) {
  372. auto result = FindMistypeIn(Columns.Artificial, name);
  373. }
  374. return result ? result : ISource::FindColumnMistype(name);
  375. }
  376. protected:
  377. TColumns Columns;
  378. };
  379. class IComposableSource : private TNonCopyable {
  380. public:
  381. virtual ~IComposableSource() = default;
  382. virtual void BuildProjectWindowDistinct(TNodePtr& blocks, TContext& ctx, bool ordered) = 0;
  383. };
  384. using TComposableSourcePtr = TIntrusivePtr<IComposableSource>;
  385. class TMuxSource: public ISource {
  386. public:
  387. TMuxSource(TPosition pos, TVector<TSourcePtr>&& sources)
  388. : ISource(pos)
  389. , Sources(std::move(sources))
  390. {
  391. YQL_ENSURE(Sources.size() > 1);
  392. }
  393. void AllColumns() final {
  394. for (auto& source: Sources) {
  395. source->AllColumns();
  396. }
  397. }
  398. const TColumns* GetColumns() const final {
  399. // Columns are equal in all sources. Return from the first one
  400. return Sources.front()->GetColumns();
  401. }
  402. void GetInputTables(TTableList& tableList) const final {
  403. for (auto& source: Sources) {
  404. source->GetInputTables(tableList);
  405. }
  406. ISource::GetInputTables(tableList);
  407. }
  408. bool IsStream() const final {
  409. return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); });
  410. }
  411. bool DoInit(TContext& ctx, ISource* src) final {
  412. for (auto& source: Sources) {
  413. if (AsInner) {
  414. source->UseAsInner();
  415. }
  416. if (src) {
  417. src->AddDependentSource(source.Get());
  418. }
  419. if (!source->Init(ctx, src)) {
  420. return false;
  421. }
  422. if (!source->InitFilters(ctx)) {
  423. return false;
  424. }
  425. }
  426. return true;
  427. }
  428. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  429. for (auto& source: Sources) {
  430. if (!source->AddColumn(ctx, column)) {
  431. return {};
  432. }
  433. }
  434. return true;
  435. }
  436. TNodePtr Build(TContext& ctx) final {
  437. TNodePtr block;
  438. auto muxArgs = Y();
  439. for (size_t i = 0; i < Sources.size(); ++i) {
  440. auto& source = Sources[i];
  441. auto input = source->Build(ctx);
  442. auto ref = ctx.MakeName("src");
  443. muxArgs->Add(ref);
  444. if (block) {
  445. block = L(block, Y("let", ref, input));
  446. } else {
  447. block = Y(Y("let", ref, input));
  448. }
  449. auto filter = source->BuildFilter(ctx, ref);
  450. if (filter) {
  451. block = L(block, Y("let", ref, filter));
  452. }
  453. if (ctx.EnableSystemColumns) {
  454. block = L(block, Y("let", ref, Y("RemoveSystemMembers", ref)));
  455. }
  456. }
  457. return GroundWithExpr(block, Y("Mux", Q(muxArgs)));
  458. }
  459. bool AddFilter(TContext& ctx, TNodePtr filter) final {
  460. Y_UNUSED(filter);
  461. ctx.Error() << "Filter is not allowed for multiple sources";
  462. return false;
  463. }
  464. TPtr DoClone() const final {
  465. return new TMuxSource(Pos, CloneContainer(Sources));
  466. }
  467. protected:
  468. TVector<TSourcePtr> Sources;
  469. };
  470. TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) {
  471. return new TMuxSource(pos, std::move(sources));
  472. }
  473. class TSubqueryRefNode: public IRealSource {
  474. public:
  475. TSubqueryRefNode(const TNodePtr& subquery, const TString& alias, int tupleIndex)
  476. : IRealSource(subquery->GetPos())
  477. , Subquery(subquery)
  478. , Alias(alias)
  479. , TupleIndex(tupleIndex)
  480. {
  481. YQL_ENSURE(subquery->GetSource());
  482. }
  483. ISource* GetSource() override {
  484. return this;
  485. }
  486. bool DoInit(TContext& ctx, ISource* src) override {
  487. // independent subquery should not connect source
  488. Subquery->UseAsInner();
  489. if (!Subquery->Init(ctx, nullptr)) {
  490. return false;
  491. }
  492. Columns = *Subquery->GetSource()->GetColumns();
  493. Node = BuildAtom(Pos, Alias, TNodeFlags::Default);
  494. if (TupleIndex != -1) {
  495. Node = Y("Nth", Node, Q(ToString(TupleIndex)));
  496. }
  497. if (!Node->Init(ctx, src)) {
  498. return false;
  499. }
  500. if (src && Subquery->GetSource()->IsSelect()) {
  501. auto columnsPtr = &Columns;
  502. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  503. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  504. } else {
  505. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  506. if (RefPos) {
  507. ctx.Error(*RefPos) << "Source is used here";
  508. }
  509. return false;
  510. }
  511. }
  512. TNodePtr sample;
  513. if (!BuildSamplingLambda(sample)) {
  514. return false;
  515. } else if (sample) {
  516. Node = Y("block", Q(Y(Y("let", Node, Y("OrderedFlatMap", Node, sample)), Y("return", Node))));
  517. }
  518. return true;
  519. }
  520. TNodePtr Build(TContext& ctx) override {
  521. Y_UNUSED(ctx);
  522. return Node;
  523. }
  524. bool SetSamplingOptions(
  525. TContext& ctx,
  526. TPosition pos,
  527. ESampleClause sampleClause,
  528. ESampleMode mode,
  529. TNodePtr samplingRate,
  530. TNodePtr samplingSeed) override {
  531. if (mode == ESampleMode::System) {
  532. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  533. return false;
  534. }
  535. if (samplingSeed) {
  536. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  537. return false;
  538. }
  539. return SetSamplingRate(ctx, sampleClause, samplingRate);
  540. }
  541. bool IsStream() const override {
  542. return Subquery->GetSource()->IsStream();
  543. }
  544. void DoUpdateState() const override {
  545. State.Set(ENodeState::Const, true);
  546. }
  547. TAstNode* Translate(TContext& ctx) const override {
  548. Y_DEBUG_ABORT_UNLESS(Node);
  549. return Node->Translate(ctx);
  550. }
  551. TPtr DoClone() const final {
  552. return new TSubqueryRefNode(Subquery, Alias, TupleIndex);
  553. }
  554. protected:
  555. TNodePtr Subquery;
  556. const TString Alias;
  557. const int TupleIndex;
  558. TNodePtr Node;
  559. };
  560. TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex) {
  561. return new TSubqueryRefNode(std::move(subquery), alias, tupleIndex);
  562. }
  563. class TInvalidSubqueryRefNode: public ISource {
  564. public:
  565. TInvalidSubqueryRefNode(TPosition pos)
  566. : ISource(pos)
  567. , Pos(pos)
  568. {
  569. }
  570. bool DoInit(TContext& ctx, ISource* src) override {
  571. Y_UNUSED(src);
  572. ctx.Error(Pos) << "Named subquery can not be used as a top level statement in libraries";
  573. return false;
  574. }
  575. TNodePtr Build(TContext& ctx) override {
  576. Y_UNUSED(ctx);
  577. return {};
  578. }
  579. TPtr DoClone() const final {
  580. return new TInvalidSubqueryRefNode(Pos);
  581. }
  582. protected:
  583. const TPosition Pos;
  584. };
  585. TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos) {
  586. return new TInvalidSubqueryRefNode(subqueryPos);
  587. }
  588. class TTableSource: public IRealSource {
  589. public:
  590. TTableSource(TPosition pos, const TTableRef& table, const TString& label)
  591. : IRealSource(pos)
  592. , Table(table)
  593. , FakeSource(BuildFakeSource(pos))
  594. {
  595. SetLabel(label.empty() ? Table.ShortName() : label);
  596. }
  597. void GetInputTables(TTableList& tableList) const override {
  598. tableList.push_back(Table);
  599. ISource::GetInputTables(tableList);
  600. }
  601. bool ShouldUseSourceAsColumn(const TString& source) const override {
  602. const auto& label = GetLabel();
  603. return source && source != label && !(label.StartsWith(source) && label[source.size()] == ':');
  604. }
  605. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  606. Columns.Add(column.GetColumnName(), column.GetCountHint(), column.IsArtificial(), column.IsReliable());
  607. if (!IRealSource::AddColumn(ctx, column)) {
  608. return {};
  609. }
  610. return false;
  611. }
  612. bool SetSamplingOptions(
  613. TContext& ctx,
  614. TPosition pos,
  615. ESampleClause sampleClause,
  616. ESampleMode mode,
  617. TNodePtr samplingRate,
  618. TNodePtr samplingSeed) override
  619. {
  620. Y_UNUSED(pos);
  621. TString modeName;
  622. if (!samplingSeed) {
  623. samplingSeed = Y("Int32", Q("0"));
  624. }
  625. if (ESampleClause::Sample == sampleClause) {
  626. YQL_ENSURE(ESampleMode::Bernoulli == mode, "Internal logic error");
  627. }
  628. switch (mode) {
  629. case ESampleMode::Bernoulli:
  630. modeName = "bernoulli";
  631. break;
  632. case ESampleMode::System:
  633. modeName = "system";
  634. break;
  635. }
  636. if (!samplingRate->Init(ctx, FakeSource.Get())) {
  637. return false;
  638. }
  639. samplingRate = PrepareSamplingRate(pos, sampleClause, samplingRate);
  640. auto sampleSettings = Q(Y(Q(modeName), Y("EvaluateAtom", Y("ToString", samplingRate)), Y("EvaluateAtom", Y("ToString", samplingSeed))));
  641. auto sampleOption = Q(Y(Q("sample"), sampleSettings));
  642. if (Table.Options) {
  643. if (!Table.Options->Init(ctx, this)) {
  644. return false;
  645. }
  646. Table.Options = L(Table.Options, sampleOption);
  647. } else {
  648. Table.Options = Y(sampleOption);
  649. }
  650. return true;
  651. }
  652. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  653. Y_UNUSED(ctx);
  654. TTableHints merged = contextHints;
  655. MergeHints(merged, hints);
  656. Table.Options = BuildInputOptions(pos, merged);
  657. return true;
  658. }
  659. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  660. return Table.Keys->SetViewName(ctx, pos, view);
  661. }
  662. TNodePtr Build(TContext& ctx) override {
  663. if (!Table.Keys->Init(ctx, nullptr)) {
  664. return nullptr;
  665. }
  666. return AstNode(Table.RefName);
  667. }
  668. bool IsStream() const override {
  669. return IsStreamingService(Table.Service);
  670. }
  671. TPtr DoClone() const final {
  672. return new TTableSource(Pos, Table, GetLabel());
  673. }
  674. bool IsTableSource() const override {
  675. return true;
  676. }
  677. protected:
  678. TTableRef Table;
  679. private:
  680. const TSourcePtr FakeSource;
  681. };
  682. TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label) {
  683. return new TTableSource(pos, table, label);
  684. }
  685. class TInnerSource: public IProxySource {
  686. public:
  687. TInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label)
  688. : IProxySource(pos, nullptr)
  689. , Node(node)
  690. , Service(service)
  691. , Cluster(cluster)
  692. {
  693. SetLabel(label);
  694. }
  695. bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause sampleClause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override {
  696. Y_UNUSED(ctx);
  697. SamplingPos = pos;
  698. SamplingClause = sampleClause;
  699. SamplingMode = mode;
  700. SamplingRate = samplingRate;
  701. SamplingSeed = samplingSeed;
  702. return true;
  703. }
  704. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  705. Y_UNUSED(ctx);
  706. HintsPos = pos;
  707. Hints = hints;
  708. ContextHints = contextHints;
  709. return true;
  710. }
  711. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  712. Y_UNUSED(ctx);
  713. ViewPos = pos;
  714. View = view;
  715. return true;
  716. }
  717. bool ShouldUseSourceAsColumn(const TString& source) const override {
  718. return source && source != GetLabel();
  719. }
  720. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  721. if (const TString* columnName = column.GetColumnName()) {
  722. if (columnName && IsExprAlias(*columnName)) {
  723. return true;
  724. }
  725. }
  726. return IProxySource::AddColumn(ctx, column);
  727. }
  728. bool DoInit(TContext& ctx, ISource* initSrc) override {
  729. Y_UNUSED(initSrc);
  730. auto source = Node->GetSource();
  731. if (!source) {
  732. NewSource = TryMakeSourceFromExpression(Pos, ctx, Service, Cluster, Node);
  733. source = NewSource.Get();
  734. }
  735. if (!source) {
  736. ctx.Error(Pos) << "Invalid inner source node";
  737. return false;
  738. }
  739. if (SamplingPos) {
  740. if (!source->SetSamplingOptions(ctx, *SamplingPos, SamplingClause, SamplingMode, SamplingRate, SamplingSeed)) {
  741. return false;
  742. }
  743. }
  744. if (ViewPos) {
  745. if (!source->SetViewName(ctx, *ViewPos, View)) {
  746. return false;
  747. }
  748. }
  749. if (HintsPos) {
  750. if (!source->SetTableHints(ctx, *HintsPos, Hints, ContextHints)) {
  751. return false;
  752. }
  753. }
  754. source->SetLabel(Label);
  755. if (!NewSource) {
  756. Node->UseAsInner();
  757. if (!Node->Init(ctx, nullptr)) {
  758. return false;
  759. }
  760. }
  761. SetSource(source);
  762. if (NewSource && !NewSource->Init(ctx, nullptr)) {
  763. return false;
  764. }
  765. return ISource::DoInit(ctx, source);
  766. }
  767. TNodePtr Build(TContext& ctx) override {
  768. Y_UNUSED(ctx);
  769. return NewSource ? NewSource->Build(ctx) : Node;
  770. }
  771. bool IsStream() const override {
  772. auto source = Node->GetSource();
  773. if (source) {
  774. return source->IsStream();
  775. }
  776. // NewSource will be built later in DoInit->TryMakeSourceFromExpression
  777. // where Service will be used in all situations
  778. // let's detect IsStream by Service value
  779. return IsStreamingService(Service);
  780. }
  781. TPtr DoClone() const final {
  782. return new TInnerSource(Pos, SafeClone(Node), Service, Cluster, GetLabel());
  783. }
  784. protected:
  785. TNodePtr Node;
  786. TString Service;
  787. TDeferredAtom Cluster;
  788. TSourcePtr NewSource;
  789. private:
  790. TMaybe<TPosition> SamplingPos;
  791. ESampleClause SamplingClause;
  792. ESampleMode SamplingMode;
  793. TNodePtr SamplingRate;
  794. TNodePtr SamplingSeed;
  795. TMaybe<TPosition> ViewPos;
  796. TString View;
  797. TMaybe<TPosition> HintsPos;
  798. TTableHints Hints;
  799. TTableHints ContextHints;
  800. };
  801. TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) {
  802. return new TInnerSource(pos, node, service, cluster, label);
  803. }
  804. static bool IsComparableExpression(TContext& ctx, const TNodePtr& expr, bool assume, const char* sqlConstruction) {
  805. if (assume && !expr->IsPlainColumn()) {
  806. ctx.Error(expr->GetPos()) << "Only column names can be used in " << sqlConstruction;
  807. return false;
  808. }
  809. if (expr->IsConstant()) {
  810. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " constant expression";
  811. return false;
  812. }
  813. if (expr->IsAggregated() && !expr->HasState(ENodeState::AggregationKey)) {
  814. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " aggregated values";
  815. return false;
  816. }
  817. if (expr->IsPlainColumn()) {
  818. return true;
  819. }
  820. if (expr->GetOpName().empty()) {
  821. ctx.Error(expr->GetPos()) << "You should use in " << sqlConstruction << " column name, qualified field, callable function or expression";
  822. return false;
  823. }
  824. return true;
  825. }
  826. /// \todo move to reduce.cpp? or mapreduce.cpp?
  827. class TReduceSource: public IRealSource {
  828. public:
  829. TReduceSource(TPosition pos,
  830. ReduceMode mode,
  831. TSourcePtr source,
  832. TVector<TSortSpecificationPtr>&& orderBy,
  833. TVector<TNodePtr>&& keys,
  834. TVector<TNodePtr>&& args,
  835. TNodePtr udf,
  836. TNodePtr having,
  837. const TWriteSettings& settings,
  838. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  839. bool listCall)
  840. : IRealSource(pos)
  841. , Mode(mode)
  842. , Source(std::move(source))
  843. , OrderBy(std::move(orderBy))
  844. , Keys(std::move(keys))
  845. , Args(std::move(args))
  846. , Udf(udf)
  847. , Having(having)
  848. , Settings(settings)
  849. , AssumeOrderBy(assumeOrderBy)
  850. , ListCall(listCall)
  851. {
  852. YQL_ENSURE(!Keys.empty());
  853. YQL_ENSURE(Source);
  854. }
  855. void GetInputTables(TTableList& tableList) const override {
  856. Source->GetInputTables(tableList);
  857. ISource::GetInputTables(tableList);
  858. }
  859. bool DoInit(TContext& ctx, ISource* src) final {
  860. if (AsInner) {
  861. Source->UseAsInner();
  862. }
  863. YQL_ENSURE(!src);
  864. if (!Source->Init(ctx, src)) {
  865. return false;
  866. }
  867. if (!Source->InitFilters(ctx)) {
  868. return false;
  869. }
  870. src = Source.Get();
  871. for (auto& key: Keys) {
  872. if (!key->Init(ctx, src)) {
  873. return false;
  874. }
  875. auto keyNamePtr = key->GetColumnName();
  876. YQL_ENSURE(keyNamePtr);
  877. if (!src->AddGroupKey(ctx, *keyNamePtr)) {
  878. return false;
  879. }
  880. }
  881. if (Having && !Having->Init(ctx, nullptr)) {
  882. return false;
  883. }
  884. /// SIN: verify reduce one argument
  885. if (Args.size() != 1) {
  886. ctx.Error(Pos) << "REDUCE requires exactly one UDF argument";
  887. return false;
  888. }
  889. if (!Args[0]->Init(ctx, src)) {
  890. return false;
  891. }
  892. for (auto orderSpec: OrderBy) {
  893. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  894. return false;
  895. }
  896. }
  897. if (!Udf->Init(ctx, src)) {
  898. return false;
  899. }
  900. if (Udf->GetLabel().empty()) {
  901. Columns.SetAll();
  902. } else {
  903. Columns.Add(&Udf->GetLabel(), false);
  904. }
  905. const auto label = GetLabel();
  906. for (const auto& sortSpec: AssumeOrderBy) {
  907. auto& expr = sortSpec->OrderExpr;
  908. SetLabel(Source->GetLabel());
  909. if (!expr->Init(ctx, this)) {
  910. return false;
  911. }
  912. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  913. return false;
  914. }
  915. }
  916. SetLabel(label);
  917. return true;
  918. }
  919. TNodePtr Build(TContext& ctx) final {
  920. auto input = Source->Build(ctx);
  921. if (!input) {
  922. return nullptr;
  923. }
  924. auto keysTuple = Y();
  925. if (Keys.size() == 1) {
  926. keysTuple = Y("Member", "row", BuildQuotedAtom(Pos, *Keys.back()->GetColumnName()));
  927. }
  928. else {
  929. for (const auto& key: Keys) {
  930. keysTuple = L(keysTuple, Y("Member", "row", BuildQuotedAtom(Pos, *key->GetColumnName())));
  931. }
  932. keysTuple = Q(keysTuple);
  933. }
  934. auto extractKey = Y("SqlExtractKey", "row", BuildLambda(Pos, Y("row"), keysTuple));
  935. auto extractKeyLambda = BuildLambda(Pos, Y("row"), extractKey);
  936. TNodePtr processPartitions;
  937. if (ListCall) {
  938. if (Mode != ReduceMode::ByAll) {
  939. ctx.Error(Pos) << "TableRows() must be used only with USING ALL";
  940. return nullptr;
  941. }
  942. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  943. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAllList", TNodeFlags::Default), Udf, expr);
  944. } else {
  945. switch (Mode) {
  946. case ReduceMode::ByAll: {
  947. auto columnPtr = Args[0]->GetColumnName();
  948. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  949. if (!columnPtr || *columnPtr != "*") {
  950. expr = Y("Map", "partitionStream", BuildLambda(Pos, Y("keyPair"), Q(L(Y(),\
  951. Y("Nth", "keyPair", Q(ToString("0"))),\
  952. Y("Map", Y("Nth", "keyPair", Q(ToString("1"))), BuildLambda(Pos, Y("row"), Args[0]))))));
  953. }
  954. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAll", TNodeFlags::Default), Udf, expr);
  955. break;
  956. }
  957. case ReduceMode::ByPartition: {
  958. processPartitions = Y("SqlReduce", "partitionStream", extractKeyLambda, Udf,
  959. BuildLambda(Pos, Y("row"), Args[0]));
  960. break;
  961. }
  962. default:
  963. YQL_ENSURE(false, "Unexpected REDUCE mode");
  964. }
  965. }
  966. TNodePtr sortDirection;
  967. TNodePtr sortKeySelector;
  968. FillSortParts(OrderBy, sortDirection, sortKeySelector);
  969. if (!OrderBy.empty()) {
  970. sortKeySelector = BuildLambda(Pos, Y("row"), Y("SqlExtractKey", "row", sortKeySelector));
  971. }
  972. auto partitionByKey = Y(!ListCall && Mode == ReduceMode::ByAll ? "PartitionByKey" : "PartitionsByKeys", "core", extractKeyLambda,
  973. sortDirection, sortKeySelector, BuildLambda(Pos, Y("partitionStream"), processPartitions));
  974. auto inputLabel = ListCall ? "inputRowsList" : "core";
  975. auto block(Y(Y("let", inputLabel, input)));
  976. auto filter = Source->BuildFilter(ctx, inputLabel);
  977. if (filter) {
  978. block = L(block, Y("let", inputLabel, filter));
  979. }
  980. if (ListCall) {
  981. block = L(block, Y("let", "core", "inputRowsList"));
  982. }
  983. if (ctx.EnableSystemColumns) {
  984. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  985. }
  986. block = L(block, Y("let", "core", Y("AutoDemux", partitionByKey)));
  987. if (Having) {
  988. block = L(block, Y("let", "core",
  989. Y("Filter", "core", BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))))
  990. ));
  991. }
  992. return Y("block", Q(L(block, Y("return", "core"))));
  993. }
  994. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  995. Y_UNUSED(ctx);
  996. if (AssumeOrderBy.empty()) {
  997. return nullptr;
  998. }
  999. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  1000. }
  1001. EOrderKind GetOrderKind() const override {
  1002. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  1003. }
  1004. TWriteSettings GetWriteSettings() const final {
  1005. return Settings;
  1006. }
  1007. bool HasSelectResult() const final {
  1008. return !Settings.Discard;
  1009. }
  1010. TPtr DoClone() const final {
  1011. return new TReduceSource(Pos, Mode, Source->CloneSource(), CloneContainer(OrderBy),
  1012. CloneContainer(Keys), CloneContainer(Args), SafeClone(Udf), SafeClone(Having), Settings,
  1013. CloneContainer(AssumeOrderBy), ListCall);
  1014. }
  1015. private:
  1016. ReduceMode Mode;
  1017. TSourcePtr Source;
  1018. TVector<TSortSpecificationPtr> OrderBy;
  1019. TVector<TNodePtr> Keys;
  1020. TVector<TNodePtr> Args;
  1021. TNodePtr Udf;
  1022. TNodePtr Having;
  1023. const TWriteSettings Settings;
  1024. TVector<TSortSpecificationPtr> AssumeOrderBy;
  1025. const bool ListCall;
  1026. };
  1027. TSourcePtr BuildReduce(TPosition pos,
  1028. ReduceMode mode,
  1029. TSourcePtr source,
  1030. TVector<TSortSpecificationPtr>&& orderBy,
  1031. TVector<TNodePtr>&& keys,
  1032. TVector<TNodePtr>&& args,
  1033. TNodePtr udf,
  1034. TNodePtr having,
  1035. const TWriteSettings& settings,
  1036. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  1037. bool listCall) {
  1038. return new TReduceSource(pos, mode, std::move(source), std::move(orderBy), std::move(keys),
  1039. std::move(args), udf, having, settings, assumeOrderBy, listCall);
  1040. }
  1041. namespace {
  1042. bool InitAndGetGroupKey(TContext& ctx, const TNodePtr& expr, ISource* src, TStringBuf where, TString& keyColumn) {
  1043. keyColumn.clear();
  1044. YQL_ENSURE(src);
  1045. const bool isJoin = src->GetJoin();
  1046. if (!expr->Init(ctx, src)) {
  1047. return false;
  1048. }
  1049. auto keyNamePtr = expr->GetColumnName();
  1050. if (keyNamePtr && expr->GetLabel().empty()) {
  1051. keyColumn = *keyNamePtr;
  1052. auto sourceNamePtr = expr->GetSourceName();
  1053. auto columnNode = expr->GetColumnNode();
  1054. if (isJoin && (!columnNode || !columnNode->IsArtificial())) {
  1055. if (!sourceNamePtr || sourceNamePtr->empty()) {
  1056. if (!src->IsAlias(EExprSeat::GroupBy, keyColumn)) {
  1057. ctx.Error(expr->GetPos()) << "Columns in " << where << " should have correlation name, error in key: " << keyColumn;
  1058. return false;
  1059. }
  1060. } else {
  1061. keyColumn = DotJoin(*sourceNamePtr, keyColumn);
  1062. }
  1063. }
  1064. }
  1065. return true;
  1066. }
  1067. }
  1068. class TCompositeSelect: public IRealSource {
  1069. public:
  1070. TCompositeSelect(TPosition pos, TSourcePtr source, TSourcePtr originalSource, const TWriteSettings& settings)
  1071. : IRealSource(pos)
  1072. , Source(std::move(source))
  1073. , OriginalSource(std::move(originalSource))
  1074. , Settings(settings)
  1075. {
  1076. YQL_ENSURE(Source);
  1077. }
  1078. void SetSubselects(TVector<TSourcePtr>&& subselects, TVector<TNodePtr>&& grouping, TVector<TNodePtr>&& groupByExpr) {
  1079. Subselects = std::move(subselects);
  1080. Grouping = std::move(grouping);
  1081. GroupByExpr = std::move(groupByExpr);
  1082. Y_DEBUG_ABORT_UNLESS(Subselects.size() > 1);
  1083. }
  1084. void GetInputTables(TTableList& tableList) const override {
  1085. for (const auto& select: Subselects) {
  1086. select->GetInputTables(tableList);
  1087. }
  1088. ISource::GetInputTables(tableList);
  1089. }
  1090. bool DoInit(TContext& ctx, ISource* src) override {
  1091. if (AsInner) {
  1092. Source->UseAsInner();
  1093. }
  1094. if (src) {
  1095. src->AddDependentSource(Source.Get());
  1096. }
  1097. if (!Source->Init(ctx, src)) {
  1098. return false;
  1099. }
  1100. if (!Source->InitFilters(ctx)) {
  1101. return false;
  1102. }
  1103. if (!CalculateGroupingCols(ctx, src)) {
  1104. return false;
  1105. }
  1106. auto origSrc = OriginalSource.Get();
  1107. if (!origSrc->Init(ctx, src)) {
  1108. return false;
  1109. }
  1110. if (origSrc->IsFlattenByColumns() || origSrc->IsFlattenColumns()) {
  1111. Flatten = origSrc->IsFlattenByColumns() ?
  1112. origSrc->BuildFlattenByColumns("row") :
  1113. origSrc->BuildFlattenColumns("row");
  1114. if (!Flatten || !Flatten->Init(ctx, src)) {
  1115. return false;
  1116. }
  1117. }
  1118. if (origSrc->IsFlattenByExprs()) {
  1119. for (auto& expr : static_cast<ISource const*>(origSrc)->Expressions(EExprSeat::FlattenByExpr)) {
  1120. if (!expr->Init(ctx, origSrc)) {
  1121. return false;
  1122. }
  1123. }
  1124. PreFlattenMap = origSrc->BuildPreFlattenMap(ctx);
  1125. if (!PreFlattenMap) {
  1126. return false;
  1127. }
  1128. }
  1129. for (const auto& select: Subselects) {
  1130. select->SetLabel(Label);
  1131. if (AsInner) {
  1132. select->UseAsInner();
  1133. }
  1134. if (!select->Init(ctx, Source.Get())) {
  1135. return false;
  1136. }
  1137. }
  1138. TMaybe<size_t> groupingColumnsCount;
  1139. size_t idx = 0;
  1140. for (const auto& select : Subselects) {
  1141. size_t count = select->GetGroupingColumnsCount();
  1142. if (!groupingColumnsCount.Defined()) {
  1143. groupingColumnsCount = count;
  1144. } else if (*groupingColumnsCount != count) {
  1145. ctx.Error(select->GetPos()) << TStringBuilder() << "Mismatch GROUPING() column count in composite select input #"
  1146. << idx << ": expected " << *groupingColumnsCount << ", got: " << count << ". Please submit bug report";
  1147. return false;
  1148. }
  1149. ++idx;
  1150. }
  1151. return true;
  1152. }
  1153. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1154. for (const auto& select: Subselects) {
  1155. if (!select->AddColumn(ctx, column)) {
  1156. return {};
  1157. }
  1158. }
  1159. return true;
  1160. }
  1161. TNodePtr Build(TContext& ctx) override {
  1162. auto input = Source->Build(ctx);
  1163. auto block(Y(Y("let", "composite", input)));
  1164. bool ordered = ctx.UseUnordered(*this);
  1165. if (PreFlattenMap) {
  1166. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1167. }
  1168. if (Flatten) {
  1169. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1170. }
  1171. auto filter = Source->BuildFilter(ctx, "composite");
  1172. if (filter) {
  1173. block = L(block, Y("let", "composite", filter));
  1174. }
  1175. TNodePtr compositeNode = Y("UnionAll");
  1176. for (const auto& select: Subselects) {
  1177. YQL_ENSURE(dynamic_cast<IComposableSource*>(select.Get()));
  1178. auto addNode = select->Build(ctx);
  1179. if (!addNode) {
  1180. return nullptr;
  1181. }
  1182. compositeNode->Add(addNode);
  1183. }
  1184. block = L(block, Y("let", "core", compositeNode));
  1185. YQL_ENSURE(!Subselects.empty());
  1186. dynamic_cast<IComposableSource*>(Subselects.front().Get())->BuildProjectWindowDistinct(block, ctx, false);
  1187. return Y("block", Q(L(block, Y("return", "core"))));
  1188. }
  1189. bool IsGroupByColumn(const TString& column) const override {
  1190. YQL_ENSURE(!GroupingCols.empty());
  1191. return GroupingCols.contains(column);
  1192. }
  1193. const TSet<TString>& GetGroupingCols() const {
  1194. return GroupingCols;
  1195. }
  1196. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1197. return Subselects.front()->BuildSort(ctx, label);
  1198. }
  1199. EOrderKind GetOrderKind() const override {
  1200. return Subselects.front()->GetOrderKind();
  1201. }
  1202. const TColumns* GetColumns() const override{
  1203. return Subselects.front()->GetColumns();
  1204. }
  1205. ISource* RealSource() const {
  1206. return Source.Get();
  1207. }
  1208. TWriteSettings GetWriteSettings() const override {
  1209. return Settings;
  1210. }
  1211. bool HasSelectResult() const override {
  1212. return !Settings.Discard;
  1213. }
  1214. TNodePtr DoClone() const final {
  1215. auto newSource = MakeIntrusive<TCompositeSelect>(Pos, Source->CloneSource(), OriginalSource->CloneSource(), Settings);
  1216. newSource->SetSubselects(CloneContainer(Subselects), CloneContainer(Grouping), CloneContainer(GroupByExpr));
  1217. return newSource;
  1218. }
  1219. private:
  1220. bool CalculateGroupingCols(TContext& ctx, ISource* initSrc) {
  1221. auto origSrc = OriginalSource->CloneSource();
  1222. if (!origSrc->Init(ctx, initSrc)) {
  1223. return false;
  1224. }
  1225. bool hasError = false;
  1226. for (auto& expr: GroupByExpr) {
  1227. if (!expr->Init(ctx, origSrc.Get()) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1228. hasError = true;
  1229. }
  1230. }
  1231. if (!origSrc->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1232. hasError = true;
  1233. }
  1234. YQL_ENSURE(!Grouping.empty());
  1235. for (auto& grouping : Grouping) {
  1236. TString keyColumn;
  1237. if (!InitAndGetGroupKey(ctx, grouping, origSrc.Get(), "grouping sets", keyColumn)) {
  1238. hasError = true;
  1239. } else if (!keyColumn.empty()) {
  1240. GroupingCols.insert(keyColumn);
  1241. }
  1242. }
  1243. return !hasError;
  1244. }
  1245. TSourcePtr Source;
  1246. TSourcePtr OriginalSource;
  1247. TNodePtr Flatten;
  1248. TNodePtr PreFlattenMap;
  1249. const TWriteSettings Settings;
  1250. TVector<TSourcePtr> Subselects;
  1251. TVector<TNodePtr> Grouping;
  1252. TVector<TNodePtr> GroupByExpr;
  1253. TSet<TString> GroupingCols;
  1254. };
  1255. namespace {
  1256. TString FullColumnName(const TColumnNode& column) {
  1257. YQL_ENSURE(column.GetColumnName());
  1258. TString columnName = *column.GetColumnName();
  1259. if (column.IsUseSource()) {
  1260. columnName = DotJoin(*column.GetSourceName(), columnName);
  1261. }
  1262. return columnName;
  1263. }
  1264. }
  1265. /// \todo simplify class
  1266. class TSelectCore: public IRealSource, public IComposableSource {
  1267. public:
  1268. TSelectCore(
  1269. TPosition pos,
  1270. TSourcePtr source,
  1271. const TVector<TNodePtr>& groupByExpr,
  1272. const TVector<TNodePtr>& groupBy,
  1273. bool compactGroupBy,
  1274. const TString& groupBySuffix,
  1275. bool assumeSorted,
  1276. const TVector<TSortSpecificationPtr>& orderBy,
  1277. TNodePtr having,
  1278. const TWinSpecs& winSpecs,
  1279. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  1280. const TVector<TNodePtr>& terms,
  1281. bool distinct,
  1282. const TVector<TNodePtr>& without,
  1283. bool selectStream,
  1284. const TWriteSettings& settings,
  1285. TColumnsSets&& uniqueSets,
  1286. TColumnsSets&& distinctSets
  1287. )
  1288. : IRealSource(pos)
  1289. , Source(std::move(source))
  1290. , GroupByExpr(groupByExpr)
  1291. , GroupBy(groupBy)
  1292. , AssumeSorted(assumeSorted)
  1293. , CompactGroupBy(compactGroupBy)
  1294. , GroupBySuffix(groupBySuffix)
  1295. , OrderBy(orderBy)
  1296. , Having(having)
  1297. , WinSpecs(winSpecs)
  1298. , Terms(terms)
  1299. , Without(without)
  1300. , Distinct(distinct)
  1301. , LegacyHoppingWindowSpec(legacyHoppingWindowSpec)
  1302. , SelectStream(selectStream)
  1303. , Settings(settings)
  1304. , UniqueSets(std::move(uniqueSets))
  1305. , DistinctSets(std::move(distinctSets))
  1306. {
  1307. }
  1308. void AllColumns() override {
  1309. if (!OrderByInit) {
  1310. Columns.SetAll();
  1311. }
  1312. }
  1313. void GetInputTables(TTableList& tableList) const override {
  1314. Source->GetInputTables(tableList);
  1315. ISource::GetInputTables(tableList);
  1316. }
  1317. size_t GetGroupingColumnsCount() const override {
  1318. return Source->GetGroupingColumnsCount();
  1319. }
  1320. bool DoInit(TContext& ctx, ISource* initSrc) override {
  1321. if (AsInner) {
  1322. Source->UseAsInner();
  1323. }
  1324. if (!Source->Init(ctx, initSrc)) {
  1325. return false;
  1326. }
  1327. if (SelectStream && !Source->IsStream()) {
  1328. ctx.Error(Pos) << "SELECT STREAM is unsupported for non-streaming sources";
  1329. return false;
  1330. }
  1331. auto src = Source.Get();
  1332. bool hasError = false;
  1333. if (src->IsFlattenByExprs()) {
  1334. for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) {
  1335. if (!expr->Init(ctx, src)) {
  1336. hasError = true;
  1337. continue;
  1338. }
  1339. }
  1340. }
  1341. if (hasError) {
  1342. return false;
  1343. }
  1344. src->SetCompactGroupBy(CompactGroupBy);
  1345. src->SetGroupBySuffix(GroupBySuffix);
  1346. for (auto& term: Terms) {
  1347. term->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1348. }
  1349. if (Having) {
  1350. Having->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1351. }
  1352. for (auto& expr: GroupByExpr) {
  1353. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) {
  1354. if (Source->IsStream()) {
  1355. ctx.Error(Pos) << "SessionWindow is unsupported for streaming sources";
  1356. return false;
  1357. }
  1358. sessionWindow->MarkValid();
  1359. }
  1360. if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) {
  1361. hoppingWindow->MarkValid();
  1362. }
  1363. // need to collect and Init() preaggregated exprs before calling Init() on GROUP BY expression
  1364. TVector<TNodePtr> distinctAggrsInGroupBy;
  1365. expr->CollectPreaggregateExprs(ctx, *src, distinctAggrsInGroupBy);
  1366. for (auto& distinct : distinctAggrsInGroupBy) {
  1367. if (!distinct->Init(ctx, src)) {
  1368. return false;
  1369. }
  1370. }
  1371. DistinctAggrExpr.insert(DistinctAggrExpr.end(), distinctAggrsInGroupBy.begin(), distinctAggrsInGroupBy.end());
  1372. if (!expr->Init(ctx, src) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1373. hasError = true;
  1374. }
  1375. }
  1376. if (hasError || !src->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1377. return false;
  1378. }
  1379. for (auto& expr: DistinctAggrExpr) {
  1380. if (!expr->Init(ctx, src)) {
  1381. hasError = true;
  1382. }
  1383. }
  1384. if (hasError || !src->AddExpressions(ctx, DistinctAggrExpr, EExprSeat::DistinctAggr)) {
  1385. return false;
  1386. }
  1387. /// grouped expressions are available in filters
  1388. if (!Source->InitFilters(ctx)) {
  1389. return false;
  1390. }
  1391. for (auto& expr: GroupBy) {
  1392. TString usedColumn;
  1393. if (!InitAndGetGroupKey(ctx, expr, src, "GROUP BY", usedColumn)) {
  1394. hasError = true;
  1395. } else if (usedColumn) {
  1396. if (!src->AddGroupKey(ctx, usedColumn)) {
  1397. hasError = true;
  1398. }
  1399. }
  1400. }
  1401. if (hasError) {
  1402. return false;
  1403. }
  1404. if (Having && !Having->Init(ctx, src)) {
  1405. return false;
  1406. }
  1407. src->AddWindowSpecs(WinSpecs);
  1408. const bool isJoin = Source->GetJoin();
  1409. if (!InitSelect(ctx, src, isJoin, hasError)) {
  1410. return false;
  1411. }
  1412. src->FinishColumns();
  1413. auto aggRes = src->BuildAggregation("core", ctx);
  1414. if (!aggRes.second) {
  1415. return false;
  1416. }
  1417. Aggregate = aggRes.first;
  1418. if (src->IsFlattenByColumns() || src->IsFlattenColumns()) {
  1419. Flatten = src->IsFlattenByColumns() ?
  1420. src->BuildFlattenByColumns("row") :
  1421. src->BuildFlattenColumns("row");
  1422. if (!Flatten || !Flatten->Init(ctx, src)) {
  1423. return false;
  1424. }
  1425. }
  1426. if (src->IsFlattenByExprs()) {
  1427. PreFlattenMap = src->BuildPreFlattenMap(ctx);
  1428. if (!PreFlattenMap) {
  1429. return false;
  1430. }
  1431. }
  1432. if (GroupByExpr || DistinctAggrExpr) {
  1433. PreaggregatedMap = src->BuildPreaggregatedMap(ctx);
  1434. if (!PreaggregatedMap) {
  1435. return false;
  1436. }
  1437. }
  1438. if (Aggregate) {
  1439. if (!Aggregate->Init(ctx, src)) {
  1440. return false;
  1441. }
  1442. if (Having) {
  1443. Aggregate = Y(
  1444. "Filter",
  1445. Aggregate,
  1446. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1447. );
  1448. }
  1449. } else if (Having) {
  1450. if (Distinct) {
  1451. Aggregate = Y(
  1452. "Filter",
  1453. "core",
  1454. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1455. );
  1456. ctx.Warning(Having->GetPos(), TIssuesIds::YQL_HAVING_WITHOUT_AGGREGATION_IN_SELECT_DISTINCT)
  1457. << "The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead.";
  1458. } else {
  1459. ctx.Error(Having->GetPos()) << "HAVING with meaning GROUP BY () should be with aggregation function.";
  1460. return false;
  1461. }
  1462. } else if (!Distinct && !GroupBy.empty()) {
  1463. ctx.Error(Pos) << "No aggregations were specified";
  1464. return false;
  1465. }
  1466. if (hasError) {
  1467. return false;
  1468. }
  1469. if (src->IsCalcOverWindow()) {
  1470. if (src->IsExprSeat(EExprSeat::WindowPartitionBy, EExprType::WithExpression)) {
  1471. PrewindowMap = src->BuildPrewindowMap(ctx);
  1472. if (!PrewindowMap) {
  1473. return false;
  1474. }
  1475. }
  1476. CalcOverWindow = src->BuildCalcOverWindow(ctx, "core");
  1477. if (!CalcOverWindow || !CalcOverWindow->Init(ctx, src)) {
  1478. return false;
  1479. }
  1480. }
  1481. return true;
  1482. }
  1483. TNodePtr Build(TContext& ctx) override {
  1484. auto input = Source->Build(ctx);
  1485. if (!input) {
  1486. return nullptr;
  1487. }
  1488. auto block(Y(Y("let", "core", input)));
  1489. if (Source->HasMatchRecognize()) {
  1490. if (auto matchRecognize = Source->BuildMatchRecognize(ctx, "core")) {
  1491. //use unique name match_recognize to find this block easily in unit tests
  1492. block = L(block, Y("let", "match_recognize", matchRecognize));
  1493. //then bind to the conventional name
  1494. block = L(block, Y("let", "core", "match_recognize"));
  1495. } else {
  1496. return nullptr;
  1497. }
  1498. }
  1499. bool ordered = ctx.UseUnordered(*this);
  1500. if (PreFlattenMap) {
  1501. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1502. }
  1503. if (Flatten) {
  1504. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1505. }
  1506. if (PreaggregatedMap) {
  1507. block = L(block, Y("let", "core", PreaggregatedMap));
  1508. if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1509. block = L(block, Y("let", "preaggregated", "core"));
  1510. }
  1511. } else if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1512. block = L(block, Y("let", "origcore", "core"));
  1513. }
  1514. auto filter = Source->BuildFilter(ctx, "core");
  1515. if (filter) {
  1516. block = L(block, Y("let", "core", filter));
  1517. }
  1518. if (Aggregate) {
  1519. block = L(block, Y("let", "core", Aggregate));
  1520. ordered = false;
  1521. }
  1522. const bool haveCompositeTerms = Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty();
  1523. if (haveCompositeTerms) {
  1524. // column order does not matter here - it will be set in projection
  1525. YQL_ENSURE(Aggregate);
  1526. block = L(block, Y("let", "core", Y("Map", "core", BuildLambda(Pos, Y("row"), CompositeTerms, "row"))));
  1527. }
  1528. if (auto grouping = Source->BuildGroupingColumns("core")) {
  1529. block = L(block, Y("let", "core", grouping));
  1530. }
  1531. if (!Source->GetCompositeSource()) {
  1532. BuildProjectWindowDistinct(block, ctx, ordered);
  1533. }
  1534. return Y("block", Q(L(block, Y("return", "core"))));
  1535. }
  1536. void BuildProjectWindowDistinct(TNodePtr& block, TContext& ctx, bool ordered) override {
  1537. if (PrewindowMap) {
  1538. block = L(block, Y("let", "core", PrewindowMap));
  1539. }
  1540. if (CalcOverWindow) {
  1541. block = L(block, Y("let", "core", CalcOverWindow));
  1542. }
  1543. block = L(block, Y("let", "core", Y("PersistableRepr", BuildSqlProject(ctx, ordered))));
  1544. if (Distinct) {
  1545. block = L(block, Y("let", "core", Y("PersistableRepr", Y("SqlAggregateAll", Y("RemoveSystemMembers", "core")))));
  1546. }
  1547. }
  1548. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1549. Y_UNUSED(ctx);
  1550. if (OrderBy.empty() || DisableSort_) {
  1551. return nullptr;
  1552. }
  1553. auto sorted = BuildSortSpec(OrderBy, label, false, AssumeSorted);
  1554. if (ExtraSortColumns.empty()) {
  1555. return Y("let", label, sorted);
  1556. }
  1557. auto body = Y();
  1558. for (const auto& [column, _] : ExtraSortColumns) {
  1559. body = L(body, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1560. }
  1561. body = L(body, Y("let", "res", "row"));
  1562. return Y("let", label, Y("OrderedMap", sorted, BuildLambda(Pos, Y("row"), body, "res")));
  1563. }
  1564. TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label) override {
  1565. TNodePtr cleanup;
  1566. if (ctx.EnableSystemColumns && ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) {
  1567. if (Columns.All) {
  1568. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1569. } else if (!Columns.List.empty()) {
  1570. const bool isJoin = Source->GetJoin();
  1571. if (!isJoin && Columns.QualifiedAll) {
  1572. if (ctx.SimpleColumns) {
  1573. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1574. } else {
  1575. TNodePtr members;
  1576. for (auto& term: Terms) {
  1577. if (term->IsAsterisk()) {
  1578. auto sourceName = term->GetSourceName();
  1579. YQL_ENSURE(*sourceName && !sourceName->empty());
  1580. auto prefix = *sourceName + "._yql_";
  1581. members = members ? L(members, Q(prefix)) : Y(Q(prefix));
  1582. }
  1583. }
  1584. if (members) {
  1585. cleanup = Y("let", label, Y("RemovePrefixMembers", label, Q(members)));
  1586. }
  1587. }
  1588. }
  1589. }
  1590. }
  1591. return cleanup;
  1592. }
  1593. bool IsSelect() const override {
  1594. return true;
  1595. }
  1596. bool HasSelectResult() const override {
  1597. return !Settings.Discard;
  1598. }
  1599. bool IsStream() const override {
  1600. return Source->IsStream();
  1601. }
  1602. EOrderKind GetOrderKind() const override {
  1603. if (OrderBy.empty()) {
  1604. return EOrderKind::None;
  1605. }
  1606. return AssumeSorted ? EOrderKind::Assume : EOrderKind::Sort;
  1607. }
  1608. TWriteSettings GetWriteSettings() const override {
  1609. return Settings;
  1610. }
  1611. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1612. const bool aggregated = Source->HasAggregations() || Distinct;
  1613. if (OrderByInit && (Source->GetJoin() || !aggregated)) {
  1614. // ORDER BY will try to find column not only in projection items, but also in Source.
  1615. // ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T
  1616. const bool reliable = column.IsReliable();
  1617. column.SetAsNotReliable();
  1618. auto maybeExist = IRealSource::AddColumn(ctx, column);
  1619. if (reliable && !Source->GetJoin()) {
  1620. column.ResetAsReliable();
  1621. }
  1622. if (!maybeExist || !maybeExist.GetRef()) {
  1623. maybeExist = Source->AddColumn(ctx, column);
  1624. }
  1625. if (!maybeExist.Defined()) {
  1626. return maybeExist;
  1627. }
  1628. if (!DisableSort_ && !aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) {
  1629. ExtraSortColumns[FullColumnName(column)] = &column;
  1630. }
  1631. return maybeExist;
  1632. }
  1633. return IRealSource::AddColumn(ctx, column);
  1634. }
  1635. bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const {
  1636. TString columnName = FullColumnName(column);
  1637. if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) {
  1638. return false;
  1639. }
  1640. if (!ctx.SimpleColumns && Columns.QualifiedAll && !columnName.Contains('.')) {
  1641. return false;
  1642. }
  1643. if (!Columns.IsColumnPossible(ctx, columnName)) {
  1644. return true;
  1645. }
  1646. for (auto without: Without) {
  1647. auto name = *without->GetColumnName();
  1648. if (Source && Source->GetJoin()) {
  1649. name = DotJoin(*without->GetSourceName(), name);
  1650. }
  1651. if (name == columnName) {
  1652. return true;
  1653. }
  1654. }
  1655. return false;
  1656. }
  1657. TNodePtr PrepareWithout(const TNodePtr& base) {
  1658. auto terms = base;
  1659. if (Without) {
  1660. for (auto without: Without) {
  1661. auto name = *without->GetColumnName();
  1662. if (Source && Source->GetJoin()) {
  1663. name = DotJoin(*without->GetSourceName(), name);
  1664. }
  1665. terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(name))));
  1666. }
  1667. }
  1668. if (Source) {
  1669. for (auto column : Source->GetTmpWindowColumns()) {
  1670. terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1671. }
  1672. }
  1673. return terms;
  1674. }
  1675. TNodePtr DoClone() const final {
  1676. return new TSelectCore(Pos, Source->CloneSource(), CloneContainer(GroupByExpr),
  1677. CloneContainer(GroupBy), CompactGroupBy, GroupBySuffix, AssumeSorted, CloneContainer(OrderBy),
  1678. SafeClone(Having), CloneContainer(WinSpecs), SafeClone(LegacyHoppingWindowSpec),
  1679. CloneContainer(Terms), Distinct, Without, SelectStream, Settings, TColumnsSets(UniqueSets), TColumnsSets(DistinctSets));
  1680. }
  1681. private:
  1682. bool InitSelect(TContext& ctx, ISource* src, bool isJoin, bool& hasError) {
  1683. for (auto& [name, winSpec] : WinSpecs) {
  1684. for (size_t i = 0; i < winSpec->Partitions.size(); ++i) {
  1685. auto partitionNode = winSpec->Partitions[i];
  1686. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(partitionNode.Get())) {
  1687. if (winSpec->Session) {
  1688. ctx.Error(partitionNode->GetPos()) << "Duplicate session window specification:";
  1689. ctx.Error(winSpec->Session->GetPos()) << "Previous session window is declared here";
  1690. hasError = true;
  1691. continue;
  1692. }
  1693. sessionWindow->MarkValid();
  1694. winSpec->Session = partitionNode;
  1695. }
  1696. if (!partitionNode->Init(ctx, src)) {
  1697. hasError = true;
  1698. continue;
  1699. }
  1700. if (!partitionNode->GetLabel() && !partitionNode->GetColumnName()) {
  1701. TString label = TStringBuilder() << "group_" << name << "_" << i;
  1702. partitionNode->SetLabel(label);
  1703. src->AddTmpWindowColumn(label);
  1704. }
  1705. }
  1706. if (!src->AddExpressions(ctx, winSpec->Partitions, EExprSeat::WindowPartitionBy)) {
  1707. hasError = true;
  1708. }
  1709. }
  1710. if (LegacyHoppingWindowSpec) {
  1711. if (!LegacyHoppingWindowSpec->TimeExtractor->Init(ctx, src)) {
  1712. hasError = true;
  1713. }
  1714. src->SetLegacyHoppingWindowSpec(LegacyHoppingWindowSpec);
  1715. }
  1716. for (auto& term: Terms) {
  1717. if (!term->Init(ctx, src)) {
  1718. hasError = true;
  1719. continue;
  1720. }
  1721. auto column = term->GetColumnName();
  1722. TString label(term->GetLabel());
  1723. bool hasName = true;
  1724. if (label.empty()) {
  1725. auto source = term->GetSourceName();
  1726. if (term->IsAsterisk() && !source->empty()) {
  1727. Columns.QualifiedAll = true;
  1728. label = DotJoin(*source, "*");
  1729. } else if (column) {
  1730. label = isJoin && source && *source ? DotJoin(*source, *column) : *column;
  1731. } else {
  1732. label = Columns.AddUnnamed();
  1733. hasName = false;
  1734. if (ctx.WarnUnnamedColumns) {
  1735. ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN)
  1736. << "Autogenerated column name " << label << " will be used for expression";
  1737. }
  1738. }
  1739. }
  1740. if (hasName && !Columns.Add(&label, false, false, true)) {
  1741. ctx.Error(Pos) << "Duplicate column: " << label;
  1742. hasError = true;
  1743. }
  1744. }
  1745. CompositeTerms = Y();
  1746. if (!hasError && Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty()) {
  1747. auto compositeSrcPtr = static_cast<TCompositeSelect*>(Source->GetCompositeSource());
  1748. if (compositeSrcPtr) {
  1749. const auto& groupings = compositeSrcPtr->GetGroupingCols();
  1750. for (const auto& column: groupings) {
  1751. if (Source->IsGroupByColumn(column)) {
  1752. continue;
  1753. }
  1754. const TString tableName = (GroupByExpr || DistinctAggrExpr) ? "preaggregated" : "origcore";
  1755. CompositeTerms = L(CompositeTerms, Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(Pos, column), Y("Nothing", Y("MatchType",
  1756. Y("StructMemberType", Y("ListItemType", Y("TypeOf", tableName)), Q(column)),
  1757. Q("Optional"), Y("lambda", Q(Y("item")), "item"), Y("lambda", Q(Y("item")), Y("OptionalType", "item")))))));
  1758. }
  1759. }
  1760. }
  1761. for (auto iter: WinSpecs) {
  1762. auto winSpec = *iter.second;
  1763. for (auto orderSpec: winSpec.OrderBy) {
  1764. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  1765. hasError = true;
  1766. }
  1767. }
  1768. }
  1769. if (Columns.All || Columns.QualifiedAll) {
  1770. Source->AllColumns();
  1771. }
  1772. for (const auto& without: Without) {
  1773. auto namePtr = without->GetColumnName();
  1774. auto sourcePtr = without->GetSourceName();
  1775. YQL_ENSURE(namePtr && *namePtr);
  1776. if (isJoin && !(sourcePtr && *sourcePtr)) {
  1777. ctx.Error(without->GetPos()) << "Expected correlation name for WITHOUT in JOIN";
  1778. hasError = true;
  1779. continue;
  1780. }
  1781. }
  1782. if (Having && !Having->Init(ctx, src)) {
  1783. hasError = true;
  1784. }
  1785. if (!src->IsCompositeSource() && !Columns.All && src->HasAggregations()) {
  1786. WarnIfAliasFromSelectIsUsedInGroupBy(ctx, Terms, GroupBy, GroupByExpr);
  1787. /// verify select aggregation compatibility
  1788. TVector<TNodePtr> exprs(Terms);
  1789. if (Having) {
  1790. exprs.push_back(Having);
  1791. }
  1792. for (const auto& iter: WinSpecs) {
  1793. for (const auto& sortSpec: iter.second->OrderBy) {
  1794. exprs.push_back(sortSpec->OrderExpr);
  1795. }
  1796. }
  1797. if (!ValidateAllNodesForAggregation(ctx, exprs)) {
  1798. hasError = true;
  1799. }
  1800. }
  1801. const auto label = GetLabel();
  1802. for (const auto& sortSpec: OrderBy) {
  1803. auto& expr = sortSpec->OrderExpr;
  1804. SetLabel(Source->GetLabel());
  1805. OrderByInit = true;
  1806. if (!expr->Init(ctx, this)) {
  1807. hasError = true;
  1808. continue;
  1809. }
  1810. OrderByInit = false;
  1811. if (!IsComparableExpression(ctx, expr, AssumeSorted, AssumeSorted ? "ASSUME ORDER BY" : "ORDER BY")) {
  1812. hasError = true;
  1813. continue;
  1814. }
  1815. }
  1816. SetLabel(label);
  1817. return !hasError;
  1818. }
  1819. TNodePtr PrepareJoinCoalesce(TContext& ctx, const TNodePtr& base, bool multipleQualifiedAll, const TVector<TString>& coalesceLabels) {
  1820. const bool isJoin = Source->GetJoin();
  1821. const bool needCoalesce = isJoin && ctx.SimpleColumns &&
  1822. (Columns.All || multipleQualifiedAll || ctx.CoalesceJoinKeysOnQualifiedAll);
  1823. if (!needCoalesce) {
  1824. return base;
  1825. }
  1826. auto terms = base;
  1827. const auto& sameKeyMap = Source->GetJoin()->GetSameKeysMap();
  1828. if (sameKeyMap) {
  1829. terms = L(terms, Y("let", "flatSameKeys", "row"));
  1830. for (const auto& [key, sources]: sameKeyMap) {
  1831. auto coalesceKeys = Y();
  1832. for (const auto& label : coalesceLabels) {
  1833. if (sources.contains(label)) {
  1834. coalesceKeys = L(coalesceKeys, Q(DotJoin(label, key)));
  1835. }
  1836. }
  1837. terms = L(terms, Y("let", "flatSameKeys", Y("CoalesceMembers", "flatSameKeys", Q(coalesceKeys))));
  1838. }
  1839. terms = L(terms, Y("let", "row", "flatSameKeys"));
  1840. }
  1841. return terms;
  1842. }
  1843. TNodePtr BuildSqlProject(TContext& ctx, bool ordered) {
  1844. auto sqlProjectArgs = Y();
  1845. const bool isJoin = Source->GetJoin();
  1846. if (Columns.All) {
  1847. YQL_ENSURE(Columns.List.empty());
  1848. auto terms = PrepareWithout(Y());
  1849. auto options = Y();
  1850. if (isJoin && ctx.SimpleColumns) {
  1851. terms = PrepareJoinCoalesce(ctx, terms, false, Source->GetJoin()->GetJoinLabels());
  1852. auto members = Y();
  1853. for (auto& source : Source->GetJoin()->GetJoinLabels()) {
  1854. YQL_ENSURE(!source.empty());
  1855. members = L(members, BuildQuotedAtom(Pos, source + "."));
  1856. }
  1857. if (GroupByExpr.empty() || ctx.BogousStarInGroupByOverJoin) {
  1858. terms = L(terms, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
  1859. } else {
  1860. auto groupExprStruct = Y("AsStruct");
  1861. for (auto node : GroupByExpr) {
  1862. auto label = node->GetLabel();
  1863. YQL_ENSURE(label);
  1864. if (Source->IsGroupByColumn(label)) {
  1865. auto name = BuildQuotedAtom(Pos, label);
  1866. groupExprStruct = L(groupExprStruct, Q(Y(name, Y("Member", "row", name))));
  1867. }
  1868. }
  1869. auto groupColumnsStruct = Y("DivePrefixMembers", "row", Q(members));
  1870. terms = L(terms, Y("let", "res", Y("FlattenMembers", Q(Y(BuildQuotedAtom(Pos, ""), groupExprStruct)),
  1871. Q(Y(BuildQuotedAtom(Pos, ""), groupColumnsStruct)))));
  1872. }
  1873. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1874. } else {
  1875. terms = L(terms, Y("let", "res", "row"));
  1876. }
  1877. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, ""), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1878. } else {
  1879. YQL_ENSURE(!Columns.List.empty());
  1880. YQL_ENSURE(Columns.List.size() == Terms.size());
  1881. TVector<TString> coalesceLabels;
  1882. bool multipleQualifiedAll = false;
  1883. if (isJoin && ctx.SimpleColumns) {
  1884. THashSet<TString> starTerms;
  1885. for (auto& term: Terms) {
  1886. if (term->IsAsterisk()) {
  1887. auto sourceName = term->GetSourceName();
  1888. YQL_ENSURE(*sourceName && !sourceName->empty());
  1889. YQL_ENSURE(Columns.QualifiedAll);
  1890. starTerms.insert(*sourceName);
  1891. }
  1892. }
  1893. TVector<TString> matched;
  1894. TVector<TString> unmatched;
  1895. for (auto& label : Source->GetJoin()->GetJoinLabels()) {
  1896. if (starTerms.contains(label)) {
  1897. matched.push_back(label);
  1898. } else {
  1899. unmatched.push_back(label);
  1900. }
  1901. }
  1902. coalesceLabels.insert(coalesceLabels.end(), matched.begin(), matched.end());
  1903. coalesceLabels.insert(coalesceLabels.end(), unmatched.begin(), unmatched.end());
  1904. multipleQualifiedAll = starTerms.size() > 1;
  1905. }
  1906. auto column = Columns.List.begin();
  1907. auto isNamedColumn = Columns.NamedColumns.begin();
  1908. for (auto& term: Terms) {
  1909. auto sourceName = term->GetSourceName();
  1910. if (!term->IsAsterisk()) {
  1911. auto body = Y();
  1912. body = L(body, Y("let", "res", term));
  1913. TPosition lambdaPos = Pos;
  1914. TPosition aliasPos = Pos;
  1915. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1916. // TODO: recanonize for positions below
  1917. lambdaPos = term->GetPos();
  1918. aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos;
  1919. }
  1920. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res"));
  1921. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1922. projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow"))))));
  1923. }
  1924. if (!*isNamedColumn) {
  1925. projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName"))))));
  1926. }
  1927. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1928. } else {
  1929. auto terms = PrepareWithout(Y());
  1930. auto options = Y();
  1931. if (ctx.SimpleColumns && !isJoin) {
  1932. terms = L(terms, Y("let", "res", "row"));
  1933. } else {
  1934. terms = PrepareJoinCoalesce(ctx, terms, multipleQualifiedAll, coalesceLabels);
  1935. auto members = isJoin ? Y() : Y("FlattenMembers");
  1936. if (isJoin) {
  1937. members = L(members, BuildQuotedAtom(Pos, *sourceName + "."));
  1938. if (ctx.SimpleColumns) {
  1939. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1940. }
  1941. members = Y(ctx.SimpleColumns ? "DivePrefixMembers" : "SelectMembers", "row", Q(members));
  1942. } else {
  1943. auto prefix = BuildQuotedAtom(Pos, ctx.SimpleColumns ? "" : *sourceName + ".");
  1944. members = L(members, Q(Y(prefix, "row")));
  1945. if (!ctx.SimpleColumns) {
  1946. options = L(options, Q(Y(Q("addPrefix"), prefix)));
  1947. }
  1948. }
  1949. terms = L(terms, Y("let", "res", members));
  1950. }
  1951. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1952. }
  1953. ++column;
  1954. ++isNamedColumn;
  1955. }
  1956. }
  1957. for (const auto& [columnName, column]: ExtraSortColumns) {
  1958. auto body = Y();
  1959. body = L(body, Y("let", "res", column));
  1960. TPosition pos = column->GetPos();
  1961. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
  1962. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1963. }
  1964. auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core"))));
  1965. block = L(block, Y("let", "core", Y(ordered ? "OrderedSqlProject" : "SqlProject", "core", Q(sqlProjectArgs))));
  1966. if (!(UniqueSets.empty() && DistinctSets.empty())) {
  1967. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  1968. const auto MakeUniqueHint = [this](INode::TPtr& block, const TColumnsSets& sets, bool distinct) {
  1969. if (!sets.empty()) {
  1970. auto assume = Y(distinct ? "AssumeDistinctHint" : "AssumeUniqueHint", "core");
  1971. if (!sets.front().empty()) {
  1972. for (const auto& columns : sets) {
  1973. auto set = Y();
  1974. for (const auto& column : columns) {
  1975. set = L(set, Q(column));
  1976. }
  1977. assume = L(assume, Q(set));
  1978. }
  1979. }
  1980. block = L(block, Y("let", "core", assume));
  1981. }
  1982. };
  1983. MakeUniqueHint(block, DistinctSets, true);
  1984. MakeUniqueHint(block, UniqueSets, false);
  1985. }
  1986. return Y("block", Q(L(block, Y("return", "core"))));
  1987. }
  1988. private:
  1989. TSourcePtr Source;
  1990. TVector<TNodePtr> GroupByExpr;
  1991. TVector<TNodePtr> DistinctAggrExpr;
  1992. TVector<TNodePtr> GroupBy;
  1993. bool AssumeSorted = false;
  1994. bool CompactGroupBy = false;
  1995. TString GroupBySuffix;
  1996. TVector<TSortSpecificationPtr> OrderBy;
  1997. TNodePtr Having;
  1998. TWinSpecs WinSpecs;
  1999. TNodePtr Flatten;
  2000. TNodePtr PreFlattenMap;
  2001. TNodePtr PreaggregatedMap;
  2002. TNodePtr PrewindowMap;
  2003. TNodePtr Aggregate;
  2004. TNodePtr CalcOverWindow;
  2005. TNodePtr CompositeTerms;
  2006. TVector<TNodePtr> Terms;
  2007. TVector<TNodePtr> Without;
  2008. const bool Distinct;
  2009. bool OrderByInit = false;
  2010. TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
  2011. const bool SelectStream;
  2012. const TWriteSettings Settings;
  2013. const TColumnsSets UniqueSets, DistinctSets;
  2014. TMap<TString, TNodePtr> ExtraSortColumns;
  2015. };
  2016. class TProcessSource: public IRealSource {
  2017. public:
  2018. TProcessSource(
  2019. TPosition pos,
  2020. TSourcePtr source,
  2021. TNodePtr with,
  2022. bool withExtFunction,
  2023. TVector<TNodePtr>&& terms,
  2024. bool listCall,
  2025. bool processStream,
  2026. const TWriteSettings& settings,
  2027. const TVector<TSortSpecificationPtr>& assumeOrderBy
  2028. )
  2029. : IRealSource(pos)
  2030. , Source(std::move(source))
  2031. , With(with)
  2032. , WithExtFunction(withExtFunction)
  2033. , Terms(std::move(terms))
  2034. , ListCall(listCall)
  2035. , ProcessStream(processStream)
  2036. , Settings(settings)
  2037. , AssumeOrderBy(assumeOrderBy)
  2038. {
  2039. }
  2040. void GetInputTables(TTableList& tableList) const override {
  2041. Source->GetInputTables(tableList);
  2042. ISource::GetInputTables(tableList);
  2043. }
  2044. bool DoInit(TContext& ctx, ISource* initSrc) override {
  2045. if (AsInner) {
  2046. Source->UseAsInner();
  2047. }
  2048. if (!Source->Init(ctx, initSrc)) {
  2049. return false;
  2050. }
  2051. if (ProcessStream && !Source->IsStream()) {
  2052. ctx.Error(Pos) << "PROCESS STREAM is unsupported for non-streaming sources";
  2053. return false;
  2054. }
  2055. auto src = Source.Get();
  2056. if (!With) {
  2057. src->AllColumns();
  2058. Columns.SetAll();
  2059. src->FinishColumns();
  2060. return true;
  2061. }
  2062. /// grouped expressions are available in filters
  2063. if (!Source->InitFilters(ctx)) {
  2064. return false;
  2065. }
  2066. TSourcePtr fakeSource = nullptr;
  2067. if (ListCall && !WithExtFunction) {
  2068. fakeSource = BuildFakeSource(src->GetPos());
  2069. src->AllColumns();
  2070. }
  2071. auto processSource = fakeSource != nullptr ? fakeSource.Get() : src;
  2072. Y_DEBUG_ABORT_UNLESS(processSource != nullptr);
  2073. if (!With->Init(ctx, processSource)) {
  2074. return false;
  2075. }
  2076. if (With->GetLabel().empty()) {
  2077. Columns.SetAll();
  2078. } else {
  2079. if (ListCall) {
  2080. ctx.Error(With->GetPos()) << "Label is not allowed to use with TableRows()";
  2081. return false;
  2082. }
  2083. Columns.Add(&With->GetLabel(), false);
  2084. }
  2085. bool hasError = false;
  2086. TNodePtr produce;
  2087. if (WithExtFunction) {
  2088. produce = Y();
  2089. } else {
  2090. TString processCall = (ListCall ? "SqlProcess" : "Apply");
  2091. produce = Y(processCall, With);
  2092. }
  2093. TMaybe<ui32> listPosIndex;
  2094. ui32 termIndex = 0;
  2095. for (auto& term: Terms) {
  2096. if (!term->GetLabel().empty()) {
  2097. ctx.Error(term->GetPos()) << "Labels are not allowed for PROCESS terms";
  2098. hasError = true;
  2099. continue;
  2100. }
  2101. if (!term->Init(ctx, processSource)) {
  2102. hasError = true;
  2103. continue;
  2104. }
  2105. if (ListCall) {
  2106. if (auto atom = dynamic_cast<TTableRows*>(term.Get())) {
  2107. listPosIndex = termIndex;
  2108. }
  2109. }
  2110. ++termIndex;
  2111. produce = L(produce, term);
  2112. }
  2113. if (hasError) {
  2114. return false;
  2115. }
  2116. if (ListCall && !WithExtFunction) {
  2117. YQL_ENSURE(listPosIndex.Defined());
  2118. produce = L(produce, Q(ToString(*listPosIndex)));
  2119. }
  2120. if (!produce->Init(ctx, src)) {
  2121. hasError = true;
  2122. }
  2123. if (!(WithExtFunction && Terms.empty())) {
  2124. TVector<TNodePtr>(1, produce).swap(Terms);
  2125. }
  2126. src->FinishColumns();
  2127. const auto label = GetLabel();
  2128. for (const auto& sortSpec: AssumeOrderBy) {
  2129. auto& expr = sortSpec->OrderExpr;
  2130. SetLabel(Source->GetLabel());
  2131. if (!expr->Init(ctx, this)) {
  2132. hasError = true;
  2133. continue;
  2134. }
  2135. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  2136. hasError = true;
  2137. continue;
  2138. }
  2139. }
  2140. SetLabel(label);
  2141. return !hasError;
  2142. }
  2143. TNodePtr Build(TContext& ctx) override {
  2144. auto input = Source->Build(ctx);
  2145. if (!input) {
  2146. return nullptr;
  2147. }
  2148. if (!With) {
  2149. auto res = input;
  2150. if (ctx.EnableSystemColumns) {
  2151. res = Y("RemoveSystemMembers", res);
  2152. }
  2153. return res;
  2154. }
  2155. TString inputLabel = ListCall ? "inputRowsList" : "core";
  2156. auto block(Y(Y("let", inputLabel, input)));
  2157. auto filter = Source->BuildFilter(ctx, inputLabel);
  2158. if (filter) {
  2159. block = L(block, Y("let", inputLabel, filter));
  2160. }
  2161. if (WithExtFunction) {
  2162. auto preTransform = Y("RemoveSystemMembers", inputLabel);
  2163. if (Terms.size() > 0) {
  2164. preTransform = Y("Map", preTransform, BuildLambda(Pos, Y("row"), Q(Terms[0])));
  2165. }
  2166. block = L(block, Y("let", inputLabel, preTransform));
  2167. block = L(block, Y("let", "transform", With));
  2168. block = L(block, Y("let", "core", Y("Apply", "transform", inputLabel)));
  2169. } else if (ListCall) {
  2170. block = L(block, Y("let", "core", Terms[0]));
  2171. } else {
  2172. auto terms = BuildColumnsTerms(ctx);
  2173. block = L(block, Y("let", "core", Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), terms, "res"))));
  2174. }
  2175. block = L(block, Y("let", "core", Y("AutoDemux", Y("PersistableRepr", "core"))));
  2176. return Y("block", Q(L(block, Y("return", "core"))));
  2177. }
  2178. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  2179. Y_UNUSED(ctx);
  2180. if (AssumeOrderBy.empty()) {
  2181. return nullptr;
  2182. }
  2183. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  2184. }
  2185. EOrderKind GetOrderKind() const override {
  2186. if (!With) {
  2187. return EOrderKind::Passthrough;
  2188. }
  2189. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  2190. }
  2191. bool IsSelect() const override {
  2192. return false;
  2193. }
  2194. bool HasSelectResult() const override {
  2195. return !Settings.Discard;
  2196. }
  2197. bool IsStream() const override {
  2198. return Source->IsStream();
  2199. }
  2200. TWriteSettings GetWriteSettings() const override {
  2201. return Settings;
  2202. }
  2203. TNodePtr DoClone() const final {
  2204. return new TProcessSource(Pos, Source->CloneSource(), SafeClone(With), WithExtFunction,
  2205. CloneContainer(Terms), ListCall, ProcessStream, Settings, CloneContainer(AssumeOrderBy));
  2206. }
  2207. private:
  2208. TNodePtr BuildColumnsTerms(TContext& ctx) {
  2209. Y_UNUSED(ctx);
  2210. TNodePtr terms;
  2211. Y_DEBUG_ABORT_UNLESS(Terms.size() == 1);
  2212. if (Columns.All) {
  2213. terms = Y(Y("let", "res", Y("ToSequence", Terms.front())));
  2214. } else {
  2215. Y_DEBUG_ABORT_UNLESS(Columns.List.size() == Terms.size());
  2216. terms = L(Y(), Y("let", "res",
  2217. L(Y("AsStructUnordered"), Q(Y(BuildQuotedAtom(Pos, Columns.List.front()), Terms.front())))));
  2218. terms = L(terms, Y("let", "res", Y("Just", "res")));
  2219. }
  2220. return terms;
  2221. }
  2222. private:
  2223. TSourcePtr Source;
  2224. TNodePtr With;
  2225. const bool WithExtFunction;
  2226. TVector<TNodePtr> Terms;
  2227. const bool ListCall;
  2228. const bool ProcessStream;
  2229. const TWriteSettings Settings;
  2230. TVector<TSortSpecificationPtr> AssumeOrderBy;
  2231. };
  2232. TSourcePtr BuildProcess(
  2233. TPosition pos,
  2234. TSourcePtr source,
  2235. TNodePtr with,
  2236. bool withExtFunction,
  2237. TVector<TNodePtr>&& terms,
  2238. bool listCall,
  2239. bool processStream,
  2240. const TWriteSettings& settings,
  2241. const TVector<TSortSpecificationPtr>& assumeOrderBy
  2242. ) {
  2243. return new TProcessSource(pos, std::move(source), with, withExtFunction, std::move(terms), listCall, processStream, settings, assumeOrderBy);
  2244. }
  2245. class TNestedProxySource: public IProxySource {
  2246. public:
  2247. TNestedProxySource(TPosition pos, const TVector<TNodePtr>& groupBy, TSourcePtr source)
  2248. : IProxySource(pos, source.Get())
  2249. , CompositeSelect(nullptr)
  2250. , Holder(std::move(source))
  2251. , GroupBy(groupBy)
  2252. {}
  2253. TNestedProxySource(TCompositeSelect* compositeSelect, const TVector<TNodePtr>& groupBy)
  2254. : IProxySource(compositeSelect->GetPos(), compositeSelect->RealSource())
  2255. , CompositeSelect(compositeSelect)
  2256. , GroupBy(groupBy)
  2257. {}
  2258. bool DoInit(TContext& ctx, ISource* src) override {
  2259. return Source->Init(ctx, src);
  2260. }
  2261. TNodePtr Build(TContext& ctx) override {
  2262. return CompositeSelect ? BuildAtom(Pos, "composite", TNodeFlags::Default) : Source->Build(ctx);
  2263. }
  2264. bool InitFilters(TContext& ctx) override {
  2265. return CompositeSelect ? true : Source->InitFilters(ctx);
  2266. }
  2267. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  2268. return CompositeSelect ? nullptr : Source->BuildFilter(ctx, label);
  2269. }
  2270. IJoin* GetJoin() override {
  2271. return Source->GetJoin();
  2272. }
  2273. bool IsCompositeSource() const override {
  2274. return true;
  2275. }
  2276. ISource* GetCompositeSource() override {
  2277. return CompositeSelect;
  2278. }
  2279. bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& hintColumn) override {
  2280. Y_UNUSED(ctx);
  2281. hintColumn = TStringBuilder() << "GroupingHint" << Hints.size();
  2282. ui64 hint = 0;
  2283. if (GroupByColumns.empty()) {
  2284. const bool isJoin = GetJoin();
  2285. for (const auto& groupByNode: GroupBy) {
  2286. auto namePtr = groupByNode->GetColumnName();
  2287. YQL_ENSURE(namePtr);
  2288. TString column = *namePtr;
  2289. if (isJoin) {
  2290. auto sourceNamePtr = groupByNode->GetSourceName();
  2291. if (sourceNamePtr && !sourceNamePtr->empty()) {
  2292. column = DotJoin(*sourceNamePtr, column);
  2293. }
  2294. }
  2295. GroupByColumns.insert(column);
  2296. }
  2297. }
  2298. for (const auto& column: columns) {
  2299. hint <<= 1;
  2300. if (!GroupByColumns.contains(column)) {
  2301. hint += 1;
  2302. }
  2303. }
  2304. Hints.push_back(hint);
  2305. return true;
  2306. }
  2307. size_t GetGroupingColumnsCount() const override {
  2308. return Hints.size();
  2309. }
  2310. TNodePtr BuildGroupingColumns(const TString& label) override {
  2311. if (Hints.empty()) {
  2312. return nullptr;
  2313. }
  2314. auto body = Y();
  2315. for (size_t i = 0; i < Hints.size(); ++i) {
  2316. TString hintColumn = TStringBuilder() << "GroupingHint" << i;
  2317. TString hintValue = ToString(Hints[i]);
  2318. body = L(body, Y("let", "row", Y("AddMember", "row", Q(hintColumn), Y("Uint64", Q(hintValue)))));
  2319. }
  2320. return Y("Map", label, BuildLambda(Pos, Y("row"), body, "row"));
  2321. }
  2322. void FinishColumns() override {
  2323. Source->FinishColumns();
  2324. }
  2325. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2326. if (const TString* columnName = column.GetColumnName()) {
  2327. if (columnName && IsExprAlias(*columnName)) {
  2328. return true;
  2329. }
  2330. }
  2331. return Source->AddColumn(ctx, column);
  2332. }
  2333. TPtr DoClone() const final {
  2334. YQL_ENSURE(Hints.empty());
  2335. return Holder.Get() ? new TNestedProxySource(Pos, CloneContainer(GroupBy), Holder->CloneSource()) :
  2336. new TNestedProxySource(CompositeSelect, CloneContainer(GroupBy));
  2337. }
  2338. private:
  2339. TCompositeSelect* CompositeSelect;
  2340. TSourcePtr Holder;
  2341. TVector<TNodePtr> GroupBy;
  2342. mutable TSet<TString> GroupByColumns;
  2343. mutable TVector<ui64> Hints;
  2344. };
  2345. namespace {
  2346. TSourcePtr DoBuildSelectCore(
  2347. TContext& ctx,
  2348. TPosition pos,
  2349. TSourcePtr originalSource,
  2350. TSourcePtr source,
  2351. const TVector<TNodePtr>& groupByExpr,
  2352. const TVector<TNodePtr>& groupBy,
  2353. bool compactGroupBy,
  2354. const TString& groupBySuffix,
  2355. bool assumeSorted,
  2356. const TVector<TSortSpecificationPtr>& orderBy,
  2357. TNodePtr having,
  2358. TWinSpecs&& winSpecs,
  2359. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2360. TVector<TNodePtr>&& terms,
  2361. bool distinct,
  2362. TVector<TNodePtr>&& without,
  2363. bool selectStream,
  2364. const TWriteSettings& settings,
  2365. TColumnsSets&& uniqueSets,
  2366. TColumnsSets&& distinctSets
  2367. ) {
  2368. if (groupBy.empty() || !groupBy.front()->ContentListPtr()) {
  2369. return new TSelectCore(pos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted,
  2370. orderBy, having, winSpecs, legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2371. }
  2372. if (groupBy.size() == 1) {
  2373. /// actualy no big idea to use grouping function in this case (result allways 0)
  2374. auto contentPtr = groupBy.front()->ContentListPtr();
  2375. source = new TNestedProxySource(pos, *contentPtr, source);
  2376. return DoBuildSelectCore(ctx, pos, originalSource, source, groupByExpr, *contentPtr, compactGroupBy, groupBySuffix,
  2377. assumeSorted, orderBy, having, std::move(winSpecs),
  2378. legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2379. }
  2380. /// \todo some smart merge logic, generalize common part of grouping (expr, flatten, etc)?
  2381. TIntrusivePtr<TCompositeSelect> compositeSelect = new TCompositeSelect(pos, std::move(source), originalSource->CloneSource(), settings);
  2382. size_t totalGroups = 0;
  2383. TVector<TSourcePtr> subselects;
  2384. TVector<TNodePtr> groupingCols;
  2385. for (auto& grouping: groupBy) {
  2386. auto contentPtr = grouping->ContentListPtr();
  2387. TVector<TNodePtr> cache(1, nullptr);
  2388. if (!contentPtr) {
  2389. cache[0] = grouping;
  2390. contentPtr = &cache;
  2391. }
  2392. groupingCols.insert(groupingCols.end(), contentPtr->cbegin(), contentPtr->cend());
  2393. TSourcePtr proxySource = new TNestedProxySource(compositeSelect.Get(), CloneContainer(*contentPtr));
  2394. if (!subselects.empty()) {
  2395. /// clone terms for others usage
  2396. TVector<TNodePtr> termsCopy;
  2397. for (const auto& term: terms) {
  2398. termsCopy.emplace_back(term->Clone());
  2399. }
  2400. std::swap(terms, termsCopy);
  2401. }
  2402. totalGroups += contentPtr->size();
  2403. TSelectCore* selectCore = new TSelectCore(pos, std::move(proxySource), CloneContainer(groupByExpr),
  2404. CloneContainer(*contentPtr), compactGroupBy, groupBySuffix, assumeSorted, orderBy, SafeClone(having), CloneContainer(winSpecs),
  2405. legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, TColumnsSets(uniqueSets), TColumnsSets(distinctSets));
  2406. subselects.emplace_back(selectCore);
  2407. }
  2408. if (totalGroups > ctx.PragmaGroupByLimit) {
  2409. ctx.Error(pos) << "Unable to GROUP BY more than " << ctx.PragmaGroupByLimit << " groups, you try use " << totalGroups << " groups";
  2410. return nullptr;
  2411. }
  2412. compositeSelect->SetSubselects(std::move(subselects), std::move(groupingCols), CloneContainer(groupByExpr));
  2413. return compositeSelect;
  2414. }
  2415. }
  2416. TSourcePtr BuildSelectCore(
  2417. TContext& ctx,
  2418. TPosition pos,
  2419. TSourcePtr source,
  2420. const TVector<TNodePtr>& groupByExpr,
  2421. const TVector<TNodePtr>& groupBy,
  2422. bool compactGroupBy,
  2423. const TString& groupBySuffix,
  2424. bool assumeSorted,
  2425. const TVector<TSortSpecificationPtr>& orderBy,
  2426. TNodePtr having,
  2427. TWinSpecs&& winSpecs,
  2428. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2429. TVector<TNodePtr>&& terms,
  2430. bool distinct,
  2431. TVector<TNodePtr>&& without,
  2432. bool selectStream,
  2433. const TWriteSettings& settings,
  2434. TColumnsSets&& uniqueSets,
  2435. TColumnsSets&& distinctSets
  2436. )
  2437. {
  2438. return DoBuildSelectCore(ctx, pos, source, source, groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy,
  2439. having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2440. }
  2441. class TUnion: public IRealSource {
  2442. public:
  2443. TUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings)
  2444. : IRealSource(pos)
  2445. , Sources(std::move(sources))
  2446. , QuantifierAll(quantifierAll)
  2447. , Settings(settings)
  2448. {
  2449. }
  2450. const TColumns* GetColumns() const override {
  2451. return IRealSource::GetColumns();
  2452. }
  2453. void GetInputTables(TTableList& tableList) const override {
  2454. for (auto& x : Sources) {
  2455. x->GetInputTables(tableList);
  2456. }
  2457. ISource::GetInputTables(tableList);
  2458. }
  2459. bool DoInit(TContext& ctx, ISource* src) override {
  2460. bool first = true;
  2461. for (auto& s: Sources) {
  2462. s->UseAsInner();
  2463. if (!s->Init(ctx, src)) {
  2464. return false;
  2465. }
  2466. if (!ctx.PositionalUnionAll || first) {
  2467. auto c = s->GetColumns();
  2468. Y_DEBUG_ABORT_UNLESS(c);
  2469. Columns.Merge(*c);
  2470. first = false;
  2471. }
  2472. }
  2473. return true;
  2474. }
  2475. TNodePtr Build(TContext& ctx) override {
  2476. TPtr res;
  2477. if (QuantifierAll) {
  2478. if (ctx.EmitUnionMerge) {
  2479. res = ctx.PositionalUnionAll ? Y("UnionMergePositional") : Y("UnionMerge");
  2480. } else {
  2481. res = ctx.PositionalUnionAll ? Y("UnionAllPositional") : Y("UnionAll");
  2482. }
  2483. } else {
  2484. res = ctx.PositionalUnionAll ? Y("UnionPositional") : Y("Union");
  2485. }
  2486. for (auto& s: Sources) {
  2487. auto input = s->Build(ctx);
  2488. if (!input) {
  2489. return nullptr;
  2490. }
  2491. res->Add(input);
  2492. }
  2493. return res;
  2494. }
  2495. bool IsStream() const override {
  2496. for (auto& s: Sources) {
  2497. if (!s->IsStream()) {
  2498. return false;
  2499. }
  2500. }
  2501. return true;
  2502. }
  2503. TNodePtr DoClone() const final {
  2504. return MakeIntrusive<TUnion>(Pos, CloneContainer(Sources), QuantifierAll, Settings);
  2505. }
  2506. bool IsSelect() const override {
  2507. return true;
  2508. }
  2509. bool HasSelectResult() const override {
  2510. return !Settings.Discard;
  2511. }
  2512. TWriteSettings GetWriteSettings() const override {
  2513. return Settings;
  2514. }
  2515. private:
  2516. TVector<TSourcePtr> Sources;
  2517. bool QuantifierAll;
  2518. const TWriteSettings Settings;
  2519. };
  2520. TSourcePtr BuildUnion(
  2521. TPosition pos,
  2522. TVector<TSourcePtr>&& sources,
  2523. bool quantifierAll,
  2524. const TWriteSettings& settings
  2525. ) {
  2526. return new TUnion(pos, std::move(sources), quantifierAll, settings);
  2527. }
  2528. class TOverWindowSource: public IProxySource {
  2529. public:
  2530. TOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource)
  2531. : IProxySource(pos, origSource)
  2532. , WindowName(windowName)
  2533. {
  2534. Source->SetLabel(origSource->GetLabel());
  2535. }
  2536. TString MakeLocalName(const TString& name) override {
  2537. return Source->MakeLocalName(name);
  2538. }
  2539. void AddTmpWindowColumn(const TString& column) override {
  2540. return Source->AddTmpWindowColumn(column);
  2541. }
  2542. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  2543. if (aggr->IsOverWindow() || aggr->IsOverWindowDistinct()) {
  2544. return Source->AddAggregationOverWindow(ctx, WindowName, aggr);
  2545. }
  2546. return Source->AddAggregation(ctx, aggr);
  2547. }
  2548. bool AddFuncOverWindow(TContext& ctx, TNodePtr expr) override {
  2549. return Source->AddFuncOverWindow(ctx, WindowName, expr);
  2550. }
  2551. bool IsOverWindowSource() const override {
  2552. return true;
  2553. }
  2554. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2555. return Source->AddColumn(ctx, column);
  2556. }
  2557. TNodePtr Build(TContext& ctx) override {
  2558. Y_UNUSED(ctx);
  2559. Y_ABORT("Unexpected call");
  2560. }
  2561. const TString* GetWindowName() const override {
  2562. return &WindowName;
  2563. }
  2564. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  2565. return Source->FindWindowSpecification(ctx, windowName);
  2566. }
  2567. TNodePtr GetSessionWindowSpec() const override {
  2568. return Source->GetSessionWindowSpec();
  2569. }
  2570. TNodePtr DoClone() const final {
  2571. return {};
  2572. }
  2573. private:
  2574. const TString WindowName;
  2575. };
  2576. TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) {
  2577. return new TOverWindowSource(pos, windowName, origSource);
  2578. }
  2579. class TSkipTakeNode final: public TAstListNode {
  2580. public:
  2581. TSkipTakeNode(TPosition pos, const TNodePtr& skip, const TNodePtr& take)
  2582. : TAstListNode(pos), IsSkipProvided_(!!skip)
  2583. {
  2584. TNodePtr select(AstNode("select"));
  2585. if (skip) {
  2586. select = Y("Skip", select, Y("Coalesce", skip, Y("Uint64", Q("0"))));
  2587. }
  2588. static const TString uiMax = ::ToString(std::numeric_limits<ui64>::max());
  2589. Add("let", "select", Y("Take", select, Y("Coalesce", take, Y("Uint64", Q(uiMax)))));
  2590. }
  2591. TPtr DoClone() const final {
  2592. return {};
  2593. }
  2594. bool HasSkip() const {
  2595. return IsSkipProvided_;
  2596. }
  2597. private:
  2598. const bool IsSkipProvided_;
  2599. };
  2600. TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take) {
  2601. return new TSkipTakeNode(pos, skip, take);
  2602. }
  2603. class TSelect: public IProxySource {
  2604. public:
  2605. TSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake)
  2606. : IProxySource(pos, source.Get())
  2607. , Source(std::move(source))
  2608. , SkipTake(skipTake)
  2609. {}
  2610. bool DoInit(TContext& ctx, ISource* src) override {
  2611. Source->SetLabel(Label);
  2612. if (AsInner) {
  2613. Source->UseAsInner();
  2614. }
  2615. if (IgnoreSort()) {
  2616. Source->DisableSort();
  2617. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY) << "ORDER BY without LIMIT in subquery will be ignored";
  2618. }
  2619. if (!Source->Init(ctx, src)) {
  2620. return false;
  2621. }
  2622. src = Source.Get();
  2623. if (SkipTake) {
  2624. FakeSource = BuildFakeSource(SkipTake->GetPos());
  2625. if (!SkipTake->Init(ctx, FakeSource.Get())) {
  2626. return false;
  2627. }
  2628. if (SkipTake->HasSkip() && EOrderKind::Sort != Source->GetOrderKind()) {
  2629. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_OFFSET_WITHOUT_SORT) << "LIMIT with OFFSET without ORDER BY may provide different results from run to run";
  2630. }
  2631. }
  2632. return true;
  2633. }
  2634. TNodePtr Build(TContext& ctx) override {
  2635. auto input = Source->Build(ctx);
  2636. if (!input) {
  2637. return nullptr;
  2638. }
  2639. const auto label = "select";
  2640. auto block(Y(Y("let", label, input)));
  2641. auto sortNode = Source->BuildSort(ctx, label);
  2642. if (sortNode && !IgnoreSort()) {
  2643. block = L(block, sortNode);
  2644. }
  2645. if (SkipTake) {
  2646. block = L(block, SkipTake);
  2647. }
  2648. TNodePtr sample;
  2649. if (!BuildSamplingLambda(sample)) {
  2650. return nullptr;
  2651. } else if (sample) {
  2652. block = L(block, Y("let", "select", Y("OrderedFlatMap", "select", sample)));
  2653. }
  2654. if (auto removeNode = Source->BuildCleanupColumns(ctx, label)) {
  2655. block = L(block, removeNode);
  2656. }
  2657. block = L(block, Y("return", label));
  2658. return Y("block", Q(block));
  2659. }
  2660. bool SetSamplingOptions(
  2661. TContext& ctx,
  2662. TPosition pos,
  2663. ESampleClause sampleClause,
  2664. ESampleMode mode,
  2665. TNodePtr samplingRate,
  2666. TNodePtr samplingSeed) override {
  2667. if (mode == ESampleMode::System) {
  2668. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  2669. return false;
  2670. }
  2671. if (samplingSeed) {
  2672. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  2673. return false;
  2674. }
  2675. return SetSamplingRate(ctx, sampleClause, samplingRate);
  2676. }
  2677. bool IsSelect() const override {
  2678. return Source->IsSelect();
  2679. }
  2680. bool HasSelectResult() const override {
  2681. return Source->HasSelectResult();
  2682. }
  2683. TPtr DoClone() const final {
  2684. return MakeIntrusive<TSelect>(Pos, Source->CloneSource(), SafeClone(SkipTake));
  2685. }
  2686. protected:
  2687. bool IgnoreSort() const {
  2688. return AsInner && !SkipTake && EOrderKind::Sort == Source->GetOrderKind();
  2689. }
  2690. TSourcePtr Source;
  2691. TNodePtr SkipTake;
  2692. TSourcePtr FakeSource;
  2693. };
  2694. TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) {
  2695. return new TSelect(pos, std::move(source), skipTake);
  2696. }
  2697. class TSelectResultNode final: public TAstListNode {
  2698. public:
  2699. TSelectResultNode(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2700. TScopedStatePtr scoped)
  2701. : TAstListNode(pos)
  2702. , Source(std::move(source))
  2703. , WriteResult(writeResult)
  2704. , InSubquery(inSubquery)
  2705. , Scoped(scoped)
  2706. {
  2707. YQL_ENSURE(Source, "Invalid source node");
  2708. FakeSource = BuildFakeSource(pos);
  2709. }
  2710. bool IsSelect() const override {
  2711. return true;
  2712. }
  2713. bool HasSelectResult() const override {
  2714. return Source->HasSelectResult();
  2715. }
  2716. bool DoInit(TContext& ctx, ISource* src) override {
  2717. if (!Source->Init(ctx, src)) {
  2718. return false;
  2719. }
  2720. src = Source.Get();
  2721. TTableList tableList;
  2722. Source->GetInputTables(tableList);
  2723. TNodePtr node(BuildInputTables(Pos, tableList, InSubquery, Scoped));
  2724. if (!node->Init(ctx, src)) {
  2725. return false;
  2726. }
  2727. auto writeSettings = src->GetWriteSettings();
  2728. bool asRef = ctx.PragmaRefSelect;
  2729. bool asAutoRef = true;
  2730. if (ctx.PragmaSampleSelect) {
  2731. asRef = false;
  2732. asAutoRef = false;
  2733. }
  2734. auto settings = Y(Q(Y(Q("type"))));
  2735. if (writeSettings.Discard) {
  2736. settings = L(settings, Q(Y(Q("discard"))));
  2737. }
  2738. if (!writeSettings.Label.Empty()) {
  2739. auto labelNode = writeSettings.Label.Build();
  2740. if (!writeSettings.Label.GetLiteral()) {
  2741. labelNode = Y("EvaluateAtom", labelNode);
  2742. }
  2743. if (!labelNode->Init(ctx, FakeSource.Get())) {
  2744. return false;
  2745. }
  2746. settings = L(settings, Q(Y(Q("label"), labelNode)));
  2747. }
  2748. if (asRef) {
  2749. settings = L(settings, Q(Y(Q("ref"))));
  2750. } else if (asAutoRef) {
  2751. settings = L(settings, Q(Y(Q("autoref"))));
  2752. }
  2753. auto columns = Source->GetColumns();
  2754. if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) {
  2755. auto list = Y();
  2756. YQL_ENSURE(columns->List.size() == columns->NamedColumns.size());
  2757. for (size_t i = 0; i < columns->List.size(); ++i) {
  2758. auto& c = columns->List[i];
  2759. if (c.EndsWith('*')) {
  2760. list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1)))));
  2761. } else if (columns->NamedColumns[i]) {
  2762. list = L(list, BuildQuotedAtom(Pos, c));
  2763. } else {
  2764. list = L(list, Q(Y(Q("auto"))));
  2765. }
  2766. }
  2767. settings = L(settings, Q(Y(Q("columns"), Q(list))));
  2768. }
  2769. if (ctx.ResultRowsLimit > 0) {
  2770. settings = L(settings, Q(Y(Q("take"), Q(ToString(ctx.ResultRowsLimit)))));
  2771. }
  2772. auto output = Source->Build(ctx);
  2773. if (!output) {
  2774. return false;
  2775. }
  2776. node = L(node, Y("let", "output", output));
  2777. if (WriteResult || writeSettings.Discard) {
  2778. if (EOrderKind::None == Source->GetOrderKind() && ctx.UseUnordered(*Source)) {
  2779. node = L(node, Y("let", "output", Y("Unordered", "output")));
  2780. if (ctx.UnorderedResult) {
  2781. settings = L(settings, Q(Y(Q("unordered"))));
  2782. }
  2783. }
  2784. auto writeResult(BuildWriteResult(Pos, "output", settings));
  2785. if (!writeResult->Init(ctx, src)) {
  2786. return false;
  2787. }
  2788. node = L(node, Y("let", "world", writeResult));
  2789. node = L(node, Y("return", "world"));
  2790. } else {
  2791. node = L(node, Y("return", "output"));
  2792. }
  2793. Add("block", Q(node));
  2794. return true;
  2795. }
  2796. TPtr DoClone() const final {
  2797. return {};
  2798. }
  2799. protected:
  2800. TSourcePtr Source;
  2801. const bool WriteResult;
  2802. const bool InSubquery;
  2803. TScopedStatePtr Scoped;
  2804. TSourcePtr FakeSource;
  2805. };
  2806. TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2807. TScopedStatePtr scoped) {
  2808. return new TSelectResultNode(pos, std::move(source), writeResult, inSubquery, scoped);
  2809. }
  2810. } // namespace NSQLTranslationV1