select.cpp 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195
  1. #include "sql.h"
  2. #include "source.h"
  3. #include "context.h"
  4. #include "match_recognize.h"
  5. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  6. #include <yql/essentials/utils/yql_panic.h>
  7. #include <library/cpp/charset/ci_string.h>
  8. using namespace NYql;
  9. namespace NSQLTranslationV1 {
  10. class TSubqueryNode: public INode {
  11. public:
  12. TSubqueryNode(TSourcePtr&& source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped)
  13. : INode(source->GetPos())
  14. , Source(std::move(source))
  15. , Alias(alias)
  16. , InSubquery(inSubquery)
  17. , EnsureTupleSize(ensureTupleSize)
  18. , Scoped(scoped)
  19. {
  20. YQL_ENSURE(!Alias.empty());
  21. }
  22. ISource* GetSource() override {
  23. return Source.Get();
  24. }
  25. bool DoInit(TContext& ctx, ISource* src) override {
  26. YQL_ENSURE(!src, "Source not expected for subquery node");
  27. Source->UseAsInner();
  28. if (!Source->Init(ctx, nullptr)) {
  29. return false;
  30. }
  31. TTableList tableList;
  32. Source->GetInputTables(tableList);
  33. auto tables = BuildInputTables(Pos, tableList, InSubquery, Scoped);
  34. if (!tables->Init(ctx, Source.Get())) {
  35. return false;
  36. }
  37. auto source = Source->Build(ctx);
  38. if (!source) {
  39. return false;
  40. }
  41. if (EnsureTupleSize != -1) {
  42. source = Y("EnsureTupleSize", source, Q(ToString(EnsureTupleSize)));
  43. }
  44. Node = Y("let", Alias, Y("block", Q(L(tables, Y("return", Q(Y("world", source)))))));
  45. IsUsed = true;
  46. return true;
  47. }
  48. void DoUpdateState() const override {
  49. State.Set(ENodeState::Const, true);
  50. }
  51. bool UsedSubquery() const override {
  52. return IsUsed;
  53. }
  54. TAstNode* Translate(TContext& ctx) const override {
  55. Y_DEBUG_ABORT_UNLESS(Node);
  56. return Node->Translate(ctx);
  57. }
  58. const TString* SubqueryAlias() const override {
  59. return &Alias;
  60. }
  61. TPtr DoClone() const final {
  62. return new TSubqueryNode(Source->CloneSource(), Alias, InSubquery, EnsureTupleSize, Scoped);
  63. }
  64. protected:
  65. TSourcePtr Source;
  66. TNodePtr Node;
  67. const TString Alias;
  68. const bool InSubquery;
  69. const int EnsureTupleSize;
  70. bool IsUsed = false;
  71. TScopedStatePtr Scoped;
  72. };
  73. TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) {
  74. return new TSubqueryNode(std::move(source), alias, inSubquery, ensureTupleSize, scoped);
  75. }
  76. class TSourceNode: public INode {
  77. public:
  78. TSourceNode(TPosition pos, TSourcePtr&& source, bool checkExist)
  79. : INode(pos)
  80. , Source(std::move(source))
  81. , CheckExist(checkExist)
  82. {}
  83. ISource* GetSource() override {
  84. return Source.Get();
  85. }
  86. bool DoInit(TContext& ctx, ISource* src) override {
  87. if (AsInner) {
  88. Source->UseAsInner();
  89. }
  90. if (!Source->Init(ctx, src)) {
  91. return false;
  92. }
  93. Node = Source->Build(ctx);
  94. if (!Node) {
  95. return false;
  96. }
  97. if (src) {
  98. if (IsSubquery()) {
  99. /// should be not used?
  100. auto columnsPtr = Source->GetColumns();
  101. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  102. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  103. } else {
  104. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  105. return false;
  106. }
  107. }
  108. src->AddDependentSource(Source.Get());
  109. }
  110. return true;
  111. }
  112. bool IsSubquery() const {
  113. return !AsInner && Source->IsSelect() && !CheckExist;
  114. }
  115. void DoUpdateState() const override {
  116. State.Set(ENodeState::Const, IsSubquery());
  117. }
  118. TAstNode* Translate(TContext& ctx) const override {
  119. Y_DEBUG_ABORT_UNLESS(Node);
  120. return Node->Translate(ctx);
  121. }
  122. TPtr DoClone() const final {
  123. return new TSourceNode(Pos, Source->CloneSource(), CheckExist);
  124. }
  125. protected:
  126. TSourcePtr Source;
  127. TNodePtr Node;
  128. bool CheckExist;
  129. };
  130. TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist) {
  131. return new TSourceNode(pos, std::move(source), checkExist);
  132. }
  133. class TFakeSource: public ISource {
  134. public:
  135. TFakeSource(TPosition pos, bool missingFrom, bool inSubquery)
  136. : ISource(pos)
  137. , MissingFrom(missingFrom)
  138. , InSubquery(inSubquery)
  139. {}
  140. bool IsFake() const override {
  141. return true;
  142. }
  143. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  144. // TODO: fix column reference scope - with proper scopes error below should happen earlier
  145. if (column.CanBeType()) {
  146. return true;
  147. }
  148. ctx.Error(Pos) << (MissingFrom ? "Column references are not allowed without FROM" : "Source does not allow column references");
  149. ctx.Error(column.GetPos()) << "Column reference "
  150. << (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)");
  151. return {};
  152. }
  153. bool AddFilter(TContext& ctx, TNodePtr filter) override {
  154. Y_UNUSED(filter);
  155. auto pos = filter ? filter->GetPos() : Pos;
  156. ctx.Error(pos) << (MissingFrom ? "Filtering is not allowed without FROM" : "Source does not allow filtering");
  157. return false;
  158. }
  159. TNodePtr Build(TContext& ctx) override {
  160. Y_UNUSED(ctx);
  161. auto ret = Y("AsList", Y("AsStruct"));
  162. if (InSubquery) {
  163. return Y("WithWorld", ret, "world");
  164. } else {
  165. return ret;
  166. }
  167. }
  168. bool AddGroupKey(TContext& ctx, const TString& column) override {
  169. Y_UNUSED(column);
  170. ctx.Error(Pos) << "Grouping is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  171. return false;
  172. }
  173. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  174. YQL_ENSURE(aggr);
  175. ctx.Error(aggr->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  176. return false;
  177. }
  178. bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) override {
  179. Y_UNUSED(windowName);
  180. YQL_ENSURE(func);
  181. ctx.Error(func->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  182. return false;
  183. }
  184. bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) override {
  185. Y_UNUSED(windowName);
  186. YQL_ENSURE(func);
  187. ctx.Error(func->GetPos()) << "Window functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  188. return false;
  189. }
  190. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  191. Y_UNUSED(windowName);
  192. ctx.Error(Pos) << "Window and aggregation functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  193. return {};
  194. }
  195. bool IsGroupByColumn(const TString& column) const override {
  196. Y_UNUSED(column);
  197. return false;
  198. }
  199. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  200. Y_UNUSED(ctx);
  201. Y_UNUSED(label);
  202. return nullptr;
  203. }
  204. std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override {
  205. Y_UNUSED(label);
  206. Y_UNUSED(ctx);
  207. return { nullptr, true };
  208. }
  209. TPtr DoClone() const final {
  210. return new TFakeSource(Pos, MissingFrom, InSubquery);
  211. }
  212. private:
  213. const bool MissingFrom;
  214. const bool InSubquery;
  215. };
  216. TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom, bool inSubquery) {
  217. return new TFakeSource(pos, missingFrom, inSubquery);
  218. }
  219. class TNodeSource: public ISource {
  220. public:
  221. TNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList)
  222. : ISource(pos)
  223. , Node(node)
  224. , WrapToList(wrapToList)
  225. {
  226. YQL_ENSURE(Node);
  227. FakeSource = BuildFakeSource(pos);
  228. }
  229. bool ShouldUseSourceAsColumn(const TString& source) const final {
  230. return source && source != GetLabel();
  231. }
  232. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  233. Y_UNUSED(ctx);
  234. Y_UNUSED(column);
  235. return true;
  236. }
  237. bool DoInit(TContext& ctx, ISource* src) final {
  238. if (!Node->Init(ctx, FakeSource.Get())) {
  239. return false;
  240. }
  241. return ISource::DoInit(ctx, src);
  242. }
  243. TNodePtr Build(TContext& /*ctx*/) final {
  244. auto nodeAst = AstNode(Node);
  245. if (WrapToList) {
  246. nodeAst = Y("ToList", nodeAst);
  247. }
  248. return nodeAst;
  249. }
  250. TPtr DoClone() const final {
  251. return new TNodeSource(Pos, SafeClone(Node), WrapToList);
  252. }
  253. private:
  254. TNodePtr Node;
  255. bool WrapToList;
  256. TSourcePtr FakeSource;
  257. };
  258. TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList) {
  259. return new TNodeSource(pos, node, wrapToList);
  260. }
  261. class IProxySource: public ISource {
  262. protected:
  263. IProxySource(TPosition pos, ISource* src)
  264. : ISource(pos)
  265. , Source(src)
  266. {}
  267. void AllColumns() override {
  268. Y_DEBUG_ABORT_UNLESS(Source);
  269. return Source->AllColumns();
  270. }
  271. const TColumns* GetColumns() const override {
  272. Y_DEBUG_ABORT_UNLESS(Source);
  273. return Source->GetColumns();
  274. }
  275. void GetInputTables(TTableList& tableList) const override {
  276. Source->GetInputTables(tableList);
  277. ISource::GetInputTables(tableList);
  278. }
  279. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  280. Y_DEBUG_ABORT_UNLESS(Source);
  281. const TString label(Source->GetLabel());
  282. Source->SetLabel(Label);
  283. const auto ret = Source->AddColumn(ctx, column);
  284. Source->SetLabel(label);
  285. return ret;
  286. }
  287. bool ShouldUseSourceAsColumn(const TString& source) const override {
  288. return Source->ShouldUseSourceAsColumn(source);
  289. }
  290. bool IsStream() const override {
  291. Y_DEBUG_ABORT_UNLESS(Source);
  292. return Source->IsStream();
  293. }
  294. EOrderKind GetOrderKind() const override {
  295. Y_DEBUG_ABORT_UNLESS(Source);
  296. return Source->GetOrderKind();
  297. }
  298. TWriteSettings GetWriteSettings() const override {
  299. Y_DEBUG_ABORT_UNLESS(Source);
  300. return Source->GetWriteSettings();
  301. }
  302. protected:
  303. void SetSource(ISource* source) {
  304. Source = source;
  305. }
  306. ISource* Source;
  307. };
  308. class IRealSource: public ISource {
  309. protected:
  310. IRealSource(TPosition pos)
  311. : ISource(pos)
  312. {
  313. }
  314. void AllColumns() override {
  315. Columns.SetAll();
  316. }
  317. const TColumns* GetColumns() const override {
  318. return &Columns;
  319. }
  320. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  321. const auto& label = *column.GetSourceName();
  322. const auto& source = GetLabel();
  323. if (!label.empty() && label != source && !(source.StartsWith(label) && source[label.size()] == ':')) {
  324. if (column.IsReliable()) {
  325. ctx.Error(column.GetPos()) << "Unknown correlation name: " << label;
  326. }
  327. return {};
  328. }
  329. if (column.IsAsterisk()) {
  330. return true;
  331. }
  332. const auto* name = column.GetColumnName();
  333. if (name && !column.CanBeType() && !Columns.IsColumnPossible(ctx, *name) && !IsAlias(EExprSeat::GroupBy, *name) && !IsAlias(EExprSeat::DistinctAggr, *name)) {
  334. if (column.IsReliable()) {
  335. TStringBuilder sb;
  336. sb << "Column " << *name << " is not in source column set";
  337. if (const auto mistype = FindColumnMistype(*name)) {
  338. sb << ". Did you mean " << mistype.GetRef() << "?";
  339. }
  340. ctx.Error(column.GetPos()) << sb;
  341. }
  342. return {};
  343. }
  344. return true;
  345. }
  346. TMaybe<TString> FindColumnMistype(const TString& name) const override {
  347. auto result = FindMistypeIn(Columns.Real, name);
  348. if (!result) {
  349. auto result = FindMistypeIn(Columns.Artificial, name);
  350. }
  351. return result ? result : ISource::FindColumnMistype(name);
  352. }
  353. protected:
  354. TColumns Columns;
  355. };
  356. class IComposableSource : private TNonCopyable {
  357. public:
  358. virtual ~IComposableSource() = default;
  359. virtual void BuildProjectWindowDistinct(TNodePtr& blocks, TContext& ctx, bool ordered) = 0;
  360. };
  361. using TComposableSourcePtr = TIntrusivePtr<IComposableSource>;
  362. class TMuxSource: public ISource {
  363. public:
  364. TMuxSource(TPosition pos, TVector<TSourcePtr>&& sources)
  365. : ISource(pos)
  366. , Sources(std::move(sources))
  367. {
  368. YQL_ENSURE(Sources.size() > 1);
  369. }
  370. void AllColumns() final {
  371. for (auto& source: Sources) {
  372. source->AllColumns();
  373. }
  374. }
  375. const TColumns* GetColumns() const final {
  376. // Columns are equal in all sources. Return from the first one
  377. return Sources.front()->GetColumns();
  378. }
  379. void GetInputTables(TTableList& tableList) const final {
  380. for (auto& source: Sources) {
  381. source->GetInputTables(tableList);
  382. }
  383. ISource::GetInputTables(tableList);
  384. }
  385. bool IsStream() const final {
  386. return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); });
  387. }
  388. bool DoInit(TContext& ctx, ISource* src) final {
  389. for (auto& source: Sources) {
  390. if (AsInner) {
  391. source->UseAsInner();
  392. }
  393. if (src) {
  394. src->AddDependentSource(source.Get());
  395. }
  396. if (!source->Init(ctx, src)) {
  397. return false;
  398. }
  399. if (!source->InitFilters(ctx)) {
  400. return false;
  401. }
  402. }
  403. return true;
  404. }
  405. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  406. for (auto& source: Sources) {
  407. if (!source->AddColumn(ctx, column)) {
  408. return {};
  409. }
  410. }
  411. return true;
  412. }
  413. TNodePtr Build(TContext& ctx) final {
  414. TNodePtr block;
  415. auto muxArgs = Y();
  416. for (size_t i = 0; i < Sources.size(); ++i) {
  417. auto& source = Sources[i];
  418. auto input = source->Build(ctx);
  419. auto ref = ctx.MakeName("src");
  420. muxArgs->Add(ref);
  421. if (block) {
  422. block = L(block, Y("let", ref, input));
  423. } else {
  424. block = Y(Y("let", ref, input));
  425. }
  426. auto filter = source->BuildFilter(ctx, ref);
  427. if (filter) {
  428. block = L(block, Y("let", ref, filter));
  429. }
  430. if (ctx.EnableSystemColumns) {
  431. block = L(block, Y("let", ref, Y("RemoveSystemMembers", ref)));
  432. }
  433. }
  434. return GroundWithExpr(block, Y("Mux", Q(muxArgs)));
  435. }
  436. bool AddFilter(TContext& ctx, TNodePtr filter) final {
  437. Y_UNUSED(filter);
  438. ctx.Error() << "Filter is not allowed for multiple sources";
  439. return false;
  440. }
  441. TPtr DoClone() const final {
  442. return new TMuxSource(Pos, CloneContainer(Sources));
  443. }
  444. protected:
  445. TVector<TSourcePtr> Sources;
  446. };
  447. TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) {
  448. return new TMuxSource(pos, std::move(sources));
  449. }
  450. class TSubqueryRefNode: public IRealSource {
  451. public:
  452. TSubqueryRefNode(const TNodePtr& subquery, const TString& alias, int tupleIndex)
  453. : IRealSource(subquery->GetPos())
  454. , Subquery(subquery)
  455. , Alias(alias)
  456. , TupleIndex(tupleIndex)
  457. {
  458. YQL_ENSURE(subquery->GetSource());
  459. }
  460. ISource* GetSource() override {
  461. return this;
  462. }
  463. bool DoInit(TContext& ctx, ISource* src) override {
  464. // independent subquery should not connect source
  465. Subquery->UseAsInner();
  466. if (!Subquery->Init(ctx, nullptr)) {
  467. return false;
  468. }
  469. Columns = *Subquery->GetSource()->GetColumns();
  470. Node = BuildAtom(Pos, Alias, TNodeFlags::Default);
  471. if (TupleIndex != -1) {
  472. Node = Y("Nth", Node, Q(ToString(TupleIndex)));
  473. }
  474. if (!Node->Init(ctx, src)) {
  475. return false;
  476. }
  477. if (src && Subquery->GetSource()->IsSelect()) {
  478. auto columnsPtr = &Columns;
  479. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  480. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  481. } else {
  482. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  483. return false;
  484. }
  485. }
  486. TNodePtr sample;
  487. if (!BuildSamplingLambda(sample)) {
  488. return false;
  489. } else if (sample) {
  490. Node = Y("block", Q(Y(Y("let", Node, Y("OrderedFlatMap", Node, sample)), Y("return", Node))));
  491. }
  492. return true;
  493. }
  494. TNodePtr Build(TContext& ctx) override {
  495. Y_UNUSED(ctx);
  496. return Node;
  497. }
  498. bool SetSamplingOptions(
  499. TContext& ctx,
  500. TPosition pos,
  501. ESampleClause sampleClause,
  502. ESampleMode mode,
  503. TNodePtr samplingRate,
  504. TNodePtr samplingSeed) override {
  505. if (mode == ESampleMode::System) {
  506. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  507. return false;
  508. }
  509. if (samplingSeed) {
  510. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  511. return false;
  512. }
  513. return SetSamplingRate(ctx, sampleClause, samplingRate);
  514. }
  515. bool IsStream() const override {
  516. return Subquery->GetSource()->IsStream();
  517. }
  518. void DoUpdateState() const override {
  519. State.Set(ENodeState::Const, true);
  520. }
  521. TAstNode* Translate(TContext& ctx) const override {
  522. Y_DEBUG_ABORT_UNLESS(Node);
  523. return Node->Translate(ctx);
  524. }
  525. TPtr DoClone() const final {
  526. return new TSubqueryRefNode(Subquery, Alias, TupleIndex);
  527. }
  528. protected:
  529. TNodePtr Subquery;
  530. const TString Alias;
  531. const int TupleIndex;
  532. TNodePtr Node;
  533. };
  534. TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex) {
  535. return new TSubqueryRefNode(std::move(subquery), alias, tupleIndex);
  536. }
  537. class TInvalidSubqueryRefNode: public ISource {
  538. public:
  539. TInvalidSubqueryRefNode(TPosition pos)
  540. : ISource(pos)
  541. , Pos(pos)
  542. {
  543. }
  544. bool DoInit(TContext& ctx, ISource* src) override {
  545. Y_UNUSED(src);
  546. ctx.Error(Pos) << "Named subquery can not be used as a top level statement in libraries";
  547. return false;
  548. }
  549. TNodePtr Build(TContext& ctx) override {
  550. Y_UNUSED(ctx);
  551. return {};
  552. }
  553. TPtr DoClone() const final {
  554. return new TInvalidSubqueryRefNode(Pos);
  555. }
  556. protected:
  557. const TPosition Pos;
  558. };
  559. TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos) {
  560. return new TInvalidSubqueryRefNode(subqueryPos);
  561. }
  562. class TTableSource: public IRealSource {
  563. public:
  564. TTableSource(TPosition pos, const TTableRef& table, const TString& label)
  565. : IRealSource(pos)
  566. , Table(table)
  567. , FakeSource(BuildFakeSource(pos))
  568. {
  569. SetLabel(label.empty() ? Table.ShortName() : label);
  570. }
  571. void GetInputTables(TTableList& tableList) const override {
  572. tableList.push_back(Table);
  573. ISource::GetInputTables(tableList);
  574. }
  575. bool ShouldUseSourceAsColumn(const TString& source) const override {
  576. const auto& label = GetLabel();
  577. return source && source != label && !(label.StartsWith(source) && label[source.size()] == ':');
  578. }
  579. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  580. Columns.Add(column.GetColumnName(), column.GetCountHint(), column.IsArtificial(), column.IsReliable());
  581. if (!IRealSource::AddColumn(ctx, column)) {
  582. return {};
  583. }
  584. return false;
  585. }
  586. bool SetSamplingOptions(
  587. TContext& ctx,
  588. TPosition pos,
  589. ESampleClause sampleClause,
  590. ESampleMode mode,
  591. TNodePtr samplingRate,
  592. TNodePtr samplingSeed) override
  593. {
  594. Y_UNUSED(pos);
  595. TString modeName;
  596. if (!samplingSeed) {
  597. samplingSeed = Y("Int32", Q("0"));
  598. }
  599. if (ESampleClause::Sample == sampleClause) {
  600. YQL_ENSURE(ESampleMode::Bernoulli == mode, "Internal logic error");
  601. }
  602. switch (mode) {
  603. case ESampleMode::Bernoulli:
  604. modeName = "bernoulli";
  605. break;
  606. case ESampleMode::System:
  607. modeName = "system";
  608. break;
  609. }
  610. if (!samplingRate->Init(ctx, FakeSource.Get())) {
  611. return false;
  612. }
  613. samplingRate = PrepareSamplingRate(pos, sampleClause, samplingRate);
  614. auto sampleSettings = Q(Y(Q(modeName), Y("EvaluateAtom", Y("ToString", samplingRate)), Y("EvaluateAtom", Y("ToString", samplingSeed))));
  615. auto sampleOption = Q(Y(Q("sample"), sampleSettings));
  616. if (Table.Options) {
  617. if (!Table.Options->Init(ctx, this)) {
  618. return false;
  619. }
  620. Table.Options = L(Table.Options, sampleOption);
  621. } else {
  622. Table.Options = Y(sampleOption);
  623. }
  624. return true;
  625. }
  626. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  627. Y_UNUSED(ctx);
  628. TTableHints merged = contextHints;
  629. MergeHints(merged, hints);
  630. Table.Options = BuildInputOptions(pos, merged);
  631. return true;
  632. }
  633. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  634. return Table.Keys->SetViewName(ctx, pos, view);
  635. }
  636. TNodePtr Build(TContext& ctx) override {
  637. if (!Table.Keys->Init(ctx, nullptr)) {
  638. return nullptr;
  639. }
  640. return AstNode(Table.RefName);
  641. }
  642. bool IsStream() const override {
  643. return IsStreamingService(Table.Service);
  644. }
  645. TPtr DoClone() const final {
  646. return new TTableSource(Pos, Table, GetLabel());
  647. }
  648. bool IsTableSource() const override {
  649. return true;
  650. }
  651. protected:
  652. TTableRef Table;
  653. private:
  654. const TSourcePtr FakeSource;
  655. };
  656. TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label) {
  657. return new TTableSource(pos, table, label);
  658. }
  659. class TInnerSource: public IProxySource {
  660. public:
  661. TInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label)
  662. : IProxySource(pos, nullptr)
  663. , Node(node)
  664. , Service(service)
  665. , Cluster(cluster)
  666. {
  667. SetLabel(label);
  668. }
  669. bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause sampleClause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override {
  670. Y_UNUSED(ctx);
  671. SamplingPos = pos;
  672. SamplingClause = sampleClause;
  673. SamplingMode = mode;
  674. SamplingRate = samplingRate;
  675. SamplingSeed = samplingSeed;
  676. return true;
  677. }
  678. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  679. Y_UNUSED(ctx);
  680. HintsPos = pos;
  681. Hints = hints;
  682. ContextHints = contextHints;
  683. return true;
  684. }
  685. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  686. Y_UNUSED(ctx);
  687. ViewPos = pos;
  688. View = view;
  689. return true;
  690. }
  691. bool ShouldUseSourceAsColumn(const TString& source) const override {
  692. return source && source != GetLabel();
  693. }
  694. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  695. if (const TString* columnName = column.GetColumnName()) {
  696. if (columnName && IsExprAlias(*columnName)) {
  697. return true;
  698. }
  699. }
  700. return IProxySource::AddColumn(ctx, column);
  701. }
  702. bool DoInit(TContext& ctx, ISource* initSrc) override {
  703. Y_UNUSED(initSrc);
  704. auto source = Node->GetSource();
  705. if (!source) {
  706. NewSource = TryMakeSourceFromExpression(Pos, ctx, Service, Cluster, Node);
  707. source = NewSource.Get();
  708. }
  709. if (!source) {
  710. ctx.Error(Pos) << "Invalid inner source node";
  711. return false;
  712. }
  713. if (SamplingPos) {
  714. if (!source->SetSamplingOptions(ctx, *SamplingPos, SamplingClause, SamplingMode, SamplingRate, SamplingSeed)) {
  715. return false;
  716. }
  717. }
  718. if (ViewPos) {
  719. if (!source->SetViewName(ctx, *ViewPos, View)) {
  720. return false;
  721. }
  722. }
  723. if (HintsPos) {
  724. if (!source->SetTableHints(ctx, *HintsPos, Hints, ContextHints)) {
  725. return false;
  726. }
  727. }
  728. source->SetLabel(Label);
  729. if (!NewSource) {
  730. Node->UseAsInner();
  731. if (!Node->Init(ctx, nullptr)) {
  732. return false;
  733. }
  734. }
  735. SetSource(source);
  736. if (NewSource && !NewSource->Init(ctx, nullptr)) {
  737. return false;
  738. }
  739. return ISource::DoInit(ctx, source);
  740. }
  741. TNodePtr Build(TContext& ctx) override {
  742. Y_UNUSED(ctx);
  743. return NewSource ? NewSource->Build(ctx) : Node;
  744. }
  745. bool IsStream() const override {
  746. auto source = Node->GetSource();
  747. if (source) {
  748. return source->IsStream();
  749. }
  750. // NewSource will be built later in DoInit->TryMakeSourceFromExpression
  751. // where Service will be used in all situations
  752. // let's detect IsStream by Service value
  753. return IsStreamingService(Service);
  754. }
  755. TPtr DoClone() const final {
  756. return new TInnerSource(Pos, SafeClone(Node), Service, Cluster, GetLabel());
  757. }
  758. protected:
  759. TNodePtr Node;
  760. TString Service;
  761. TDeferredAtom Cluster;
  762. TSourcePtr NewSource;
  763. private:
  764. TMaybe<TPosition> SamplingPos;
  765. ESampleClause SamplingClause;
  766. ESampleMode SamplingMode;
  767. TNodePtr SamplingRate;
  768. TNodePtr SamplingSeed;
  769. TMaybe<TPosition> ViewPos;
  770. TString View;
  771. TMaybe<TPosition> HintsPos;
  772. TTableHints Hints;
  773. TTableHints ContextHints;
  774. };
  775. TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) {
  776. return new TInnerSource(pos, node, service, cluster, label);
  777. }
  778. static bool IsComparableExpression(TContext& ctx, const TNodePtr& expr, bool assume, const char* sqlConstruction) {
  779. if (assume && !expr->IsPlainColumn()) {
  780. ctx.Error(expr->GetPos()) << "Only column names can be used in " << sqlConstruction;
  781. return false;
  782. }
  783. if (expr->IsConstant()) {
  784. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " constant expression";
  785. return false;
  786. }
  787. if (expr->IsAggregated() && !expr->HasState(ENodeState::AggregationKey)) {
  788. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " aggregated values";
  789. return false;
  790. }
  791. if (expr->IsPlainColumn()) {
  792. return true;
  793. }
  794. if (expr->GetOpName().empty()) {
  795. ctx.Error(expr->GetPos()) << "You should use in " << sqlConstruction << " column name, qualified field, callable function or expression";
  796. return false;
  797. }
  798. return true;
  799. }
  800. /// \todo move to reduce.cpp? or mapreduce.cpp?
  801. class TReduceSource: public IRealSource {
  802. public:
  803. TReduceSource(TPosition pos,
  804. ReduceMode mode,
  805. TSourcePtr source,
  806. TVector<TSortSpecificationPtr>&& orderBy,
  807. TVector<TNodePtr>&& keys,
  808. TVector<TNodePtr>&& args,
  809. TNodePtr udf,
  810. TNodePtr having,
  811. const TWriteSettings& settings,
  812. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  813. bool listCall)
  814. : IRealSource(pos)
  815. , Mode(mode)
  816. , Source(std::move(source))
  817. , OrderBy(std::move(orderBy))
  818. , Keys(std::move(keys))
  819. , Args(std::move(args))
  820. , Udf(udf)
  821. , Having(having)
  822. , Settings(settings)
  823. , AssumeOrderBy(assumeOrderBy)
  824. , ListCall(listCall)
  825. {
  826. YQL_ENSURE(!Keys.empty());
  827. YQL_ENSURE(Source);
  828. }
  829. void GetInputTables(TTableList& tableList) const override {
  830. Source->GetInputTables(tableList);
  831. ISource::GetInputTables(tableList);
  832. }
  833. bool DoInit(TContext& ctx, ISource* src) final {
  834. if (AsInner) {
  835. Source->UseAsInner();
  836. }
  837. YQL_ENSURE(!src);
  838. if (!Source->Init(ctx, src)) {
  839. return false;
  840. }
  841. if (!Source->InitFilters(ctx)) {
  842. return false;
  843. }
  844. src = Source.Get();
  845. for (auto& key: Keys) {
  846. if (!key->Init(ctx, src)) {
  847. return false;
  848. }
  849. auto keyNamePtr = key->GetColumnName();
  850. YQL_ENSURE(keyNamePtr);
  851. if (!src->AddGroupKey(ctx, *keyNamePtr)) {
  852. return false;
  853. }
  854. }
  855. if (Having && !Having->Init(ctx, nullptr)) {
  856. return false;
  857. }
  858. /// SIN: verify reduce one argument
  859. if (Args.size() != 1) {
  860. ctx.Error(Pos) << "REDUCE requires exactly one UDF argument";
  861. return false;
  862. }
  863. if (!Args[0]->Init(ctx, src)) {
  864. return false;
  865. }
  866. for (auto orderSpec: OrderBy) {
  867. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  868. return false;
  869. }
  870. }
  871. if (!Udf->Init(ctx, src)) {
  872. return false;
  873. }
  874. if (Udf->GetLabel().empty()) {
  875. Columns.SetAll();
  876. } else {
  877. Columns.Add(&Udf->GetLabel(), false);
  878. }
  879. const auto label = GetLabel();
  880. for (const auto& sortSpec: AssumeOrderBy) {
  881. auto& expr = sortSpec->OrderExpr;
  882. SetLabel(Source->GetLabel());
  883. if (!expr->Init(ctx, this)) {
  884. return false;
  885. }
  886. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  887. return false;
  888. }
  889. }
  890. SetLabel(label);
  891. return true;
  892. }
  893. TNodePtr Build(TContext& ctx) final {
  894. auto input = Source->Build(ctx);
  895. if (!input) {
  896. return nullptr;
  897. }
  898. auto keysTuple = Y();
  899. if (Keys.size() == 1) {
  900. keysTuple = Y("Member", "row", BuildQuotedAtom(Pos, *Keys.back()->GetColumnName()));
  901. }
  902. else {
  903. for (const auto& key: Keys) {
  904. keysTuple = L(keysTuple, Y("Member", "row", BuildQuotedAtom(Pos, *key->GetColumnName())));
  905. }
  906. keysTuple = Q(keysTuple);
  907. }
  908. auto extractKey = Y("SqlExtractKey", "row", BuildLambda(Pos, Y("row"), keysTuple));
  909. auto extractKeyLambda = BuildLambda(Pos, Y("row"), extractKey);
  910. TNodePtr processPartitions;
  911. if (ListCall) {
  912. if (Mode != ReduceMode::ByAll) {
  913. ctx.Error(Pos) << "TableRows() must be used only with USING ALL";
  914. return nullptr;
  915. }
  916. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  917. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAllList", TNodeFlags::Default), Udf, expr);
  918. } else {
  919. switch (Mode) {
  920. case ReduceMode::ByAll: {
  921. auto columnPtr = Args[0]->GetColumnName();
  922. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  923. if (!columnPtr || *columnPtr != "*") {
  924. expr = Y("Map", "partitionStream", BuildLambda(Pos, Y("keyPair"), Q(L(Y(),\
  925. Y("Nth", "keyPair", Q(ToString("0"))),\
  926. Y("Map", Y("Nth", "keyPair", Q(ToString("1"))), BuildLambda(Pos, Y("row"), Args[0]))))));
  927. }
  928. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAll", TNodeFlags::Default), Udf, expr);
  929. break;
  930. }
  931. case ReduceMode::ByPartition: {
  932. processPartitions = Y("SqlReduce", "partitionStream", extractKeyLambda, Udf,
  933. BuildLambda(Pos, Y("row"), Args[0]));
  934. break;
  935. }
  936. default:
  937. YQL_ENSURE(false, "Unexpected REDUCE mode");
  938. }
  939. }
  940. TNodePtr sortDirection;
  941. TNodePtr sortKeySelector;
  942. FillSortParts(OrderBy, sortDirection, sortKeySelector);
  943. if (!OrderBy.empty()) {
  944. sortKeySelector = BuildLambda(Pos, Y("row"), Y("SqlExtractKey", "row", sortKeySelector));
  945. }
  946. auto partitionByKey = Y(!ListCall && Mode == ReduceMode::ByAll ? "PartitionByKey" : "PartitionsByKeys", "core", extractKeyLambda,
  947. sortDirection, sortKeySelector, BuildLambda(Pos, Y("partitionStream"), processPartitions));
  948. auto inputLabel = ListCall ? "inputRowsList" : "core";
  949. auto block(Y(Y("let", inputLabel, input)));
  950. auto filter = Source->BuildFilter(ctx, inputLabel);
  951. if (filter) {
  952. block = L(block, Y("let", inputLabel, filter));
  953. }
  954. if (ListCall) {
  955. block = L(block, Y("let", "core", "inputRowsList"));
  956. }
  957. if (ctx.EnableSystemColumns) {
  958. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  959. }
  960. block = L(block, Y("let", "core", Y("AutoDemux", partitionByKey)));
  961. if (Having) {
  962. block = L(block, Y("let", "core",
  963. Y("Filter", "core", BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))))
  964. ));
  965. }
  966. return Y("block", Q(L(block, Y("return", "core"))));
  967. }
  968. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  969. Y_UNUSED(ctx);
  970. if (AssumeOrderBy.empty()) {
  971. return nullptr;
  972. }
  973. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  974. }
  975. EOrderKind GetOrderKind() const override {
  976. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  977. }
  978. TWriteSettings GetWriteSettings() const final {
  979. return Settings;
  980. }
  981. bool HasSelectResult() const final {
  982. return !Settings.Discard;
  983. }
  984. TPtr DoClone() const final {
  985. return new TReduceSource(Pos, Mode, Source->CloneSource(), CloneContainer(OrderBy),
  986. CloneContainer(Keys), CloneContainer(Args), SafeClone(Udf), SafeClone(Having), Settings,
  987. CloneContainer(AssumeOrderBy), ListCall);
  988. }
  989. private:
  990. ReduceMode Mode;
  991. TSourcePtr Source;
  992. TVector<TSortSpecificationPtr> OrderBy;
  993. TVector<TNodePtr> Keys;
  994. TVector<TNodePtr> Args;
  995. TNodePtr Udf;
  996. TNodePtr Having;
  997. const TWriteSettings Settings;
  998. TVector<TSortSpecificationPtr> AssumeOrderBy;
  999. const bool ListCall;
  1000. };
  1001. TSourcePtr BuildReduce(TPosition pos,
  1002. ReduceMode mode,
  1003. TSourcePtr source,
  1004. TVector<TSortSpecificationPtr>&& orderBy,
  1005. TVector<TNodePtr>&& keys,
  1006. TVector<TNodePtr>&& args,
  1007. TNodePtr udf,
  1008. TNodePtr having,
  1009. const TWriteSettings& settings,
  1010. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  1011. bool listCall) {
  1012. return new TReduceSource(pos, mode, std::move(source), std::move(orderBy), std::move(keys),
  1013. std::move(args), udf, having, settings, assumeOrderBy, listCall);
  1014. }
  1015. namespace {
  1016. bool InitAndGetGroupKey(TContext& ctx, const TNodePtr& expr, ISource* src, TStringBuf where, TString& keyColumn) {
  1017. keyColumn.clear();
  1018. YQL_ENSURE(src);
  1019. const bool isJoin = src->GetJoin();
  1020. if (!expr->Init(ctx, src)) {
  1021. return false;
  1022. }
  1023. auto keyNamePtr = expr->GetColumnName();
  1024. if (keyNamePtr && expr->GetLabel().empty()) {
  1025. keyColumn = *keyNamePtr;
  1026. auto sourceNamePtr = expr->GetSourceName();
  1027. auto columnNode = expr->GetColumnNode();
  1028. if (isJoin && (!columnNode || !columnNode->IsArtificial())) {
  1029. if (!sourceNamePtr || sourceNamePtr->empty()) {
  1030. if (!src->IsAlias(EExprSeat::GroupBy, keyColumn)) {
  1031. ctx.Error(expr->GetPos()) << "Columns in " << where << " should have correlation name, error in key: " << keyColumn;
  1032. return false;
  1033. }
  1034. } else {
  1035. keyColumn = DotJoin(*sourceNamePtr, keyColumn);
  1036. }
  1037. }
  1038. }
  1039. return true;
  1040. }
  1041. }
  1042. class TCompositeSelect: public IRealSource {
  1043. public:
  1044. TCompositeSelect(TPosition pos, TSourcePtr source, TSourcePtr originalSource, const TWriteSettings& settings)
  1045. : IRealSource(pos)
  1046. , Source(std::move(source))
  1047. , OriginalSource(std::move(originalSource))
  1048. , Settings(settings)
  1049. {
  1050. YQL_ENSURE(Source);
  1051. }
  1052. void SetSubselects(TVector<TSourcePtr>&& subselects, TVector<TNodePtr>&& grouping, TVector<TNodePtr>&& groupByExpr) {
  1053. Subselects = std::move(subselects);
  1054. Grouping = std::move(grouping);
  1055. GroupByExpr = std::move(groupByExpr);
  1056. Y_DEBUG_ABORT_UNLESS(Subselects.size() > 1);
  1057. }
  1058. void GetInputTables(TTableList& tableList) const override {
  1059. for (const auto& select: Subselects) {
  1060. select->GetInputTables(tableList);
  1061. }
  1062. ISource::GetInputTables(tableList);
  1063. }
  1064. bool DoInit(TContext& ctx, ISource* src) override {
  1065. if (AsInner) {
  1066. Source->UseAsInner();
  1067. }
  1068. if (src) {
  1069. src->AddDependentSource(Source.Get());
  1070. }
  1071. if (!Source->Init(ctx, src)) {
  1072. return false;
  1073. }
  1074. if (!Source->InitFilters(ctx)) {
  1075. return false;
  1076. }
  1077. if (!CalculateGroupingCols(ctx, src)) {
  1078. return false;
  1079. }
  1080. auto origSrc = OriginalSource.Get();
  1081. if (!origSrc->Init(ctx, src)) {
  1082. return false;
  1083. }
  1084. if (origSrc->IsFlattenByColumns() || origSrc->IsFlattenColumns()) {
  1085. Flatten = origSrc->IsFlattenByColumns() ?
  1086. origSrc->BuildFlattenByColumns("row") :
  1087. origSrc->BuildFlattenColumns("row");
  1088. if (!Flatten || !Flatten->Init(ctx, src)) {
  1089. return false;
  1090. }
  1091. }
  1092. if (origSrc->IsFlattenByExprs()) {
  1093. for (auto& expr : static_cast<ISource const*>(origSrc)->Expressions(EExprSeat::FlattenByExpr)) {
  1094. if (!expr->Init(ctx, origSrc)) {
  1095. return false;
  1096. }
  1097. }
  1098. PreFlattenMap = origSrc->BuildPreFlattenMap(ctx);
  1099. if (!PreFlattenMap) {
  1100. return false;
  1101. }
  1102. }
  1103. for (const auto& select: Subselects) {
  1104. select->SetLabel(Label);
  1105. if (AsInner) {
  1106. select->UseAsInner();
  1107. }
  1108. if (!select->Init(ctx, Source.Get())) {
  1109. return false;
  1110. }
  1111. }
  1112. TMaybe<size_t> groupingColumnsCount;
  1113. size_t idx = 0;
  1114. for (const auto& select : Subselects) {
  1115. size_t count = select->GetGroupingColumnsCount();
  1116. if (!groupingColumnsCount.Defined()) {
  1117. groupingColumnsCount = count;
  1118. } else if (*groupingColumnsCount != count) {
  1119. ctx.Error(select->GetPos()) << TStringBuilder() << "Mismatch GROUPING() column count in composite select input #"
  1120. << idx << ": expected " << *groupingColumnsCount << ", got: " << count << ". Please submit bug report";
  1121. return false;
  1122. }
  1123. ++idx;
  1124. }
  1125. return true;
  1126. }
  1127. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1128. for (const auto& select: Subselects) {
  1129. if (!select->AddColumn(ctx, column)) {
  1130. return {};
  1131. }
  1132. }
  1133. return true;
  1134. }
  1135. TNodePtr Build(TContext& ctx) override {
  1136. auto input = Source->Build(ctx);
  1137. auto block(Y(Y("let", "composite", input)));
  1138. bool ordered = ctx.UseUnordered(*this);
  1139. if (PreFlattenMap) {
  1140. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1141. }
  1142. if (Flatten) {
  1143. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1144. }
  1145. auto filter = Source->BuildFilter(ctx, "composite");
  1146. if (filter) {
  1147. block = L(block, Y("let", "composite", filter));
  1148. }
  1149. TNodePtr compositeNode = Y("UnionAll");
  1150. for (const auto& select: Subselects) {
  1151. YQL_ENSURE(dynamic_cast<IComposableSource*>(select.Get()));
  1152. auto addNode = select->Build(ctx);
  1153. if (!addNode) {
  1154. return nullptr;
  1155. }
  1156. compositeNode->Add(addNode);
  1157. }
  1158. block = L(block, Y("let", "core", compositeNode));
  1159. YQL_ENSURE(!Subselects.empty());
  1160. dynamic_cast<IComposableSource*>(Subselects.front().Get())->BuildProjectWindowDistinct(block, ctx, false);
  1161. return Y("block", Q(L(block, Y("return", "core"))));
  1162. }
  1163. bool IsGroupByColumn(const TString& column) const override {
  1164. YQL_ENSURE(!GroupingCols.empty());
  1165. return GroupingCols.contains(column);
  1166. }
  1167. const TSet<TString>& GetGroupingCols() const {
  1168. return GroupingCols;
  1169. }
  1170. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1171. return Subselects.front()->BuildSort(ctx, label);
  1172. }
  1173. EOrderKind GetOrderKind() const override {
  1174. return Subselects.front()->GetOrderKind();
  1175. }
  1176. const TColumns* GetColumns() const override{
  1177. return Subselects.front()->GetColumns();
  1178. }
  1179. ISource* RealSource() const {
  1180. return Source.Get();
  1181. }
  1182. TWriteSettings GetWriteSettings() const override {
  1183. return Settings;
  1184. }
  1185. bool HasSelectResult() const override {
  1186. return !Settings.Discard;
  1187. }
  1188. TNodePtr DoClone() const final {
  1189. auto newSource = MakeIntrusive<TCompositeSelect>(Pos, Source->CloneSource(), OriginalSource->CloneSource(), Settings);
  1190. newSource->SetSubselects(CloneContainer(Subselects), CloneContainer(Grouping), CloneContainer(GroupByExpr));
  1191. return newSource;
  1192. }
  1193. private:
  1194. bool CalculateGroupingCols(TContext& ctx, ISource* initSrc) {
  1195. auto origSrc = OriginalSource->CloneSource();
  1196. if (!origSrc->Init(ctx, initSrc)) {
  1197. return false;
  1198. }
  1199. bool hasError = false;
  1200. for (auto& expr: GroupByExpr) {
  1201. if (!expr->Init(ctx, origSrc.Get()) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1202. hasError = true;
  1203. }
  1204. }
  1205. if (!origSrc->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1206. hasError = true;
  1207. }
  1208. YQL_ENSURE(!Grouping.empty());
  1209. for (auto& grouping : Grouping) {
  1210. TString keyColumn;
  1211. if (!InitAndGetGroupKey(ctx, grouping, origSrc.Get(), "grouping sets", keyColumn)) {
  1212. hasError = true;
  1213. } else if (!keyColumn.empty()) {
  1214. GroupingCols.insert(keyColumn);
  1215. }
  1216. }
  1217. return !hasError;
  1218. }
  1219. TSourcePtr Source;
  1220. TSourcePtr OriginalSource;
  1221. TNodePtr Flatten;
  1222. TNodePtr PreFlattenMap;
  1223. const TWriteSettings Settings;
  1224. TVector<TSourcePtr> Subselects;
  1225. TVector<TNodePtr> Grouping;
  1226. TVector<TNodePtr> GroupByExpr;
  1227. TSet<TString> GroupingCols;
  1228. };
  1229. namespace {
  1230. TString FullColumnName(const TColumnNode& column) {
  1231. YQL_ENSURE(column.GetColumnName());
  1232. TString columnName = *column.GetColumnName();
  1233. if (column.IsUseSource()) {
  1234. columnName = DotJoin(*column.GetSourceName(), columnName);
  1235. }
  1236. return columnName;
  1237. }
  1238. }
  1239. /// \todo simplify class
  1240. class TSelectCore: public IRealSource, public IComposableSource {
  1241. public:
  1242. TSelectCore(
  1243. TPosition pos,
  1244. TSourcePtr source,
  1245. const TVector<TNodePtr>& groupByExpr,
  1246. const TVector<TNodePtr>& groupBy,
  1247. bool compactGroupBy,
  1248. const TString& groupBySuffix,
  1249. bool assumeSorted,
  1250. const TVector<TSortSpecificationPtr>& orderBy,
  1251. TNodePtr having,
  1252. const TWinSpecs& winSpecs,
  1253. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  1254. const TVector<TNodePtr>& terms,
  1255. bool distinct,
  1256. const TVector<TNodePtr>& without,
  1257. bool selectStream,
  1258. const TWriteSettings& settings,
  1259. TColumnsSets&& uniqueSets,
  1260. TColumnsSets&& distinctSets
  1261. )
  1262. : IRealSource(pos)
  1263. , Source(std::move(source))
  1264. , GroupByExpr(groupByExpr)
  1265. , GroupBy(groupBy)
  1266. , AssumeSorted(assumeSorted)
  1267. , CompactGroupBy(compactGroupBy)
  1268. , GroupBySuffix(groupBySuffix)
  1269. , OrderBy(orderBy)
  1270. , Having(having)
  1271. , WinSpecs(winSpecs)
  1272. , Terms(terms)
  1273. , Without(without)
  1274. , Distinct(distinct)
  1275. , LegacyHoppingWindowSpec(legacyHoppingWindowSpec)
  1276. , SelectStream(selectStream)
  1277. , Settings(settings)
  1278. , UniqueSets(std::move(uniqueSets))
  1279. , DistinctSets(std::move(distinctSets))
  1280. {
  1281. }
  1282. void AllColumns() override {
  1283. if (!OrderByInit) {
  1284. Columns.SetAll();
  1285. }
  1286. }
  1287. void GetInputTables(TTableList& tableList) const override {
  1288. Source->GetInputTables(tableList);
  1289. ISource::GetInputTables(tableList);
  1290. }
  1291. size_t GetGroupingColumnsCount() const override {
  1292. return Source->GetGroupingColumnsCount();
  1293. }
  1294. bool DoInit(TContext& ctx, ISource* initSrc) override {
  1295. if (AsInner) {
  1296. Source->UseAsInner();
  1297. }
  1298. if (!Source->Init(ctx, initSrc)) {
  1299. return false;
  1300. }
  1301. if (SelectStream && !Source->IsStream()) {
  1302. ctx.Error(Pos) << "SELECT STREAM is unsupported for non-streaming sources";
  1303. return false;
  1304. }
  1305. auto src = Source.Get();
  1306. bool hasError = false;
  1307. if (src->IsFlattenByExprs()) {
  1308. for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) {
  1309. if (!expr->Init(ctx, src)) {
  1310. hasError = true;
  1311. continue;
  1312. }
  1313. }
  1314. }
  1315. if (hasError) {
  1316. return false;
  1317. }
  1318. src->SetCompactGroupBy(CompactGroupBy);
  1319. src->SetGroupBySuffix(GroupBySuffix);
  1320. for (auto& term: Terms) {
  1321. term->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1322. }
  1323. if (Having) {
  1324. Having->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1325. }
  1326. for (auto& expr: GroupByExpr) {
  1327. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) {
  1328. if (Source->IsStream()) {
  1329. ctx.Error(Pos) << "SessionWindow is unsupported for streaming sources";
  1330. return false;
  1331. }
  1332. sessionWindow->MarkValid();
  1333. }
  1334. if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) {
  1335. hoppingWindow->MarkValid();
  1336. }
  1337. // need to collect and Init() preaggregated exprs before calling Init() on GROUP BY expression
  1338. TVector<TNodePtr> distinctAggrsInGroupBy;
  1339. expr->CollectPreaggregateExprs(ctx, *src, distinctAggrsInGroupBy);
  1340. for (auto& distinct : distinctAggrsInGroupBy) {
  1341. if (!distinct->Init(ctx, src)) {
  1342. return false;
  1343. }
  1344. }
  1345. DistinctAggrExpr.insert(DistinctAggrExpr.end(), distinctAggrsInGroupBy.begin(), distinctAggrsInGroupBy.end());
  1346. if (!expr->Init(ctx, src) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1347. hasError = true;
  1348. }
  1349. }
  1350. if (hasError || !src->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1351. return false;
  1352. }
  1353. for (auto& expr: DistinctAggrExpr) {
  1354. if (!expr->Init(ctx, src)) {
  1355. hasError = true;
  1356. }
  1357. }
  1358. if (hasError || !src->AddExpressions(ctx, DistinctAggrExpr, EExprSeat::DistinctAggr)) {
  1359. return false;
  1360. }
  1361. /// grouped expressions are available in filters
  1362. if (!Source->InitFilters(ctx)) {
  1363. return false;
  1364. }
  1365. for (auto& expr: GroupBy) {
  1366. TString usedColumn;
  1367. if (!InitAndGetGroupKey(ctx, expr, src, "GROUP BY", usedColumn)) {
  1368. hasError = true;
  1369. } else if (usedColumn) {
  1370. if (!src->AddGroupKey(ctx, usedColumn)) {
  1371. hasError = true;
  1372. }
  1373. }
  1374. }
  1375. if (hasError) {
  1376. return false;
  1377. }
  1378. if (Having && !Having->Init(ctx, src)) {
  1379. return false;
  1380. }
  1381. src->AddWindowSpecs(WinSpecs);
  1382. const bool isJoin = Source->GetJoin();
  1383. if (!InitSelect(ctx, src, isJoin, hasError)) {
  1384. return false;
  1385. }
  1386. src->FinishColumns();
  1387. auto aggRes = src->BuildAggregation("core", ctx);
  1388. if (!aggRes.second) {
  1389. return false;
  1390. }
  1391. Aggregate = aggRes.first;
  1392. if (src->IsFlattenByColumns() || src->IsFlattenColumns()) {
  1393. Flatten = src->IsFlattenByColumns() ?
  1394. src->BuildFlattenByColumns("row") :
  1395. src->BuildFlattenColumns("row");
  1396. if (!Flatten || !Flatten->Init(ctx, src)) {
  1397. return false;
  1398. }
  1399. }
  1400. if (src->IsFlattenByExprs()) {
  1401. PreFlattenMap = src->BuildPreFlattenMap(ctx);
  1402. if (!PreFlattenMap) {
  1403. return false;
  1404. }
  1405. }
  1406. if (GroupByExpr || DistinctAggrExpr) {
  1407. PreaggregatedMap = src->BuildPreaggregatedMap(ctx);
  1408. if (!PreaggregatedMap) {
  1409. return false;
  1410. }
  1411. }
  1412. if (Aggregate) {
  1413. if (!Aggregate->Init(ctx, src)) {
  1414. return false;
  1415. }
  1416. if (Having) {
  1417. Aggregate = Y(
  1418. "Filter",
  1419. Aggregate,
  1420. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1421. );
  1422. }
  1423. } else if (Having) {
  1424. if (Distinct) {
  1425. Aggregate = Y(
  1426. "Filter",
  1427. "core",
  1428. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1429. );
  1430. ctx.Warning(Having->GetPos(), TIssuesIds::YQL_HAVING_WITHOUT_AGGREGATION_IN_SELECT_DISTINCT)
  1431. << "The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead.";
  1432. } else {
  1433. ctx.Error(Having->GetPos()) << "HAVING with meaning GROUP BY () should be with aggregation function.";
  1434. return false;
  1435. }
  1436. } else if (!Distinct && !GroupBy.empty()) {
  1437. ctx.Error(Pos) << "No aggregations were specified";
  1438. return false;
  1439. }
  1440. if (hasError) {
  1441. return false;
  1442. }
  1443. if (src->IsCalcOverWindow()) {
  1444. if (src->IsExprSeat(EExprSeat::WindowPartitionBy, EExprType::WithExpression)) {
  1445. PrewindowMap = src->BuildPrewindowMap(ctx);
  1446. if (!PrewindowMap) {
  1447. return false;
  1448. }
  1449. }
  1450. CalcOverWindow = src->BuildCalcOverWindow(ctx, "core");
  1451. if (!CalcOverWindow || !CalcOverWindow->Init(ctx, src)) {
  1452. return false;
  1453. }
  1454. }
  1455. return true;
  1456. }
  1457. TNodePtr Build(TContext& ctx) override {
  1458. auto input = Source->Build(ctx);
  1459. if (!input) {
  1460. return nullptr;
  1461. }
  1462. auto block(Y(Y("let", "core", input)));
  1463. if (Source->HasMatchRecognize()) {
  1464. if (auto matchRecognize = Source->BuildMatchRecognize(ctx, "core")) {
  1465. //use unique name match_recognize to find this block easily in unit tests
  1466. block = L(block, Y("let", "match_recognize", matchRecognize));
  1467. //then bind to the conventional name
  1468. block = L(block, Y("let", "core", "match_recognize"));
  1469. } else {
  1470. return nullptr;
  1471. }
  1472. }
  1473. bool ordered = ctx.UseUnordered(*this);
  1474. if (PreFlattenMap) {
  1475. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1476. }
  1477. if (Flatten) {
  1478. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1479. }
  1480. if (PreaggregatedMap) {
  1481. block = L(block, Y("let", "core", PreaggregatedMap));
  1482. if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1483. block = L(block, Y("let", "preaggregated", "core"));
  1484. }
  1485. } else if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1486. block = L(block, Y("let", "origcore", "core"));
  1487. }
  1488. auto filter = Source->BuildFilter(ctx, "core");
  1489. if (filter) {
  1490. block = L(block, Y("let", "core", filter));
  1491. }
  1492. if (Aggregate) {
  1493. block = L(block, Y("let", "core", Aggregate));
  1494. ordered = false;
  1495. }
  1496. const bool haveCompositeTerms = Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty();
  1497. if (haveCompositeTerms) {
  1498. // column order does not matter here - it will be set in projection
  1499. YQL_ENSURE(Aggregate);
  1500. block = L(block, Y("let", "core", Y("Map", "core", BuildLambda(Pos, Y("row"), CompositeTerms, "row"))));
  1501. }
  1502. if (auto grouping = Source->BuildGroupingColumns("core")) {
  1503. block = L(block, Y("let", "core", grouping));
  1504. }
  1505. if (!Source->GetCompositeSource()) {
  1506. BuildProjectWindowDistinct(block, ctx, ordered);
  1507. }
  1508. return Y("block", Q(L(block, Y("return", "core"))));
  1509. }
  1510. void BuildProjectWindowDistinct(TNodePtr& block, TContext& ctx, bool ordered) override {
  1511. if (PrewindowMap) {
  1512. block = L(block, Y("let", "core", PrewindowMap));
  1513. }
  1514. if (CalcOverWindow) {
  1515. block = L(block, Y("let", "core", CalcOverWindow));
  1516. }
  1517. block = L(block, Y("let", "core", Y("PersistableRepr", BuildSqlProject(ctx, ordered))));
  1518. if (Distinct) {
  1519. block = L(block, Y("let", "core", Y("PersistableRepr", Y("SqlAggregateAll", Y("RemoveSystemMembers", "core")))));
  1520. }
  1521. }
  1522. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1523. Y_UNUSED(ctx);
  1524. if (OrderBy.empty() || DisableSort_) {
  1525. return nullptr;
  1526. }
  1527. auto sorted = BuildSortSpec(OrderBy, label, false, AssumeSorted);
  1528. if (ExtraSortColumns.empty()) {
  1529. return Y("let", label, sorted);
  1530. }
  1531. auto body = Y();
  1532. for (const auto& [column, _] : ExtraSortColumns) {
  1533. body = L(body, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1534. }
  1535. body = L(body, Y("let", "res", "row"));
  1536. return Y("let", label, Y("OrderedMap", sorted, BuildLambda(Pos, Y("row"), body, "res")));
  1537. }
  1538. TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label) override {
  1539. TNodePtr cleanup;
  1540. if (ctx.EnableSystemColumns && ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) {
  1541. if (Columns.All) {
  1542. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1543. } else if (!Columns.List.empty()) {
  1544. const bool isJoin = Source->GetJoin();
  1545. if (!isJoin && Columns.QualifiedAll) {
  1546. if (ctx.SimpleColumns) {
  1547. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1548. } else {
  1549. TNodePtr members;
  1550. for (auto& term: Terms) {
  1551. if (term->IsAsterisk()) {
  1552. auto sourceName = term->GetSourceName();
  1553. YQL_ENSURE(*sourceName && !sourceName->empty());
  1554. auto prefix = *sourceName + "._yql_";
  1555. members = members ? L(members, Q(prefix)) : Y(Q(prefix));
  1556. }
  1557. }
  1558. if (members) {
  1559. cleanup = Y("let", label, Y("RemovePrefixMembers", label, Q(members)));
  1560. }
  1561. }
  1562. }
  1563. }
  1564. }
  1565. return cleanup;
  1566. }
  1567. bool IsSelect() const override {
  1568. return true;
  1569. }
  1570. bool HasSelectResult() const override {
  1571. return !Settings.Discard;
  1572. }
  1573. bool IsStream() const override {
  1574. return Source->IsStream();
  1575. }
  1576. EOrderKind GetOrderKind() const override {
  1577. if (OrderBy.empty()) {
  1578. return EOrderKind::None;
  1579. }
  1580. return AssumeSorted ? EOrderKind::Assume : EOrderKind::Sort;
  1581. }
  1582. TWriteSettings GetWriteSettings() const override {
  1583. return Settings;
  1584. }
  1585. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1586. const bool aggregated = Source->HasAggregations() || Distinct;
  1587. if (OrderByInit && (Source->GetJoin() || !aggregated)) {
  1588. // ORDER BY will try to find column not only in projection items, but also in Source.
  1589. // ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T
  1590. const bool reliable = column.IsReliable();
  1591. column.SetAsNotReliable();
  1592. auto maybeExist = IRealSource::AddColumn(ctx, column);
  1593. if (reliable && !Source->GetJoin()) {
  1594. column.ResetAsReliable();
  1595. }
  1596. if (!maybeExist || !maybeExist.GetRef()) {
  1597. maybeExist = Source->AddColumn(ctx, column);
  1598. }
  1599. if (!maybeExist.Defined()) {
  1600. return maybeExist;
  1601. }
  1602. if (!DisableSort_ && !aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) {
  1603. ExtraSortColumns[FullColumnName(column)] = &column;
  1604. }
  1605. return maybeExist;
  1606. }
  1607. return IRealSource::AddColumn(ctx, column);
  1608. }
  1609. bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const {
  1610. TString columnName = FullColumnName(column);
  1611. if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) {
  1612. return false;
  1613. }
  1614. if (!Columns.IsColumnPossible(ctx, columnName)) {
  1615. return true;
  1616. }
  1617. for (auto without: Without) {
  1618. auto name = *without->GetColumnName();
  1619. if (Source && Source->GetJoin()) {
  1620. name = DotJoin(*without->GetSourceName(), name);
  1621. }
  1622. if (name == columnName) {
  1623. return true;
  1624. }
  1625. }
  1626. return false;
  1627. }
  1628. TNodePtr PrepareWithout(const TNodePtr& base) {
  1629. auto terms = base;
  1630. if (Without) {
  1631. for (auto without: Without) {
  1632. auto name = *without->GetColumnName();
  1633. if (Source && Source->GetJoin()) {
  1634. name = DotJoin(*without->GetSourceName(), name);
  1635. }
  1636. terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(name))));
  1637. }
  1638. }
  1639. if (Source) {
  1640. for (auto column : Source->GetTmpWindowColumns()) {
  1641. terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1642. }
  1643. }
  1644. return terms;
  1645. }
  1646. TNodePtr DoClone() const final {
  1647. return new TSelectCore(Pos, Source->CloneSource(), CloneContainer(GroupByExpr),
  1648. CloneContainer(GroupBy), CompactGroupBy, GroupBySuffix, AssumeSorted, CloneContainer(OrderBy),
  1649. SafeClone(Having), CloneContainer(WinSpecs), SafeClone(LegacyHoppingWindowSpec),
  1650. CloneContainer(Terms), Distinct, Without, SelectStream, Settings, TColumnsSets(UniqueSets), TColumnsSets(DistinctSets));
  1651. }
  1652. private:
  1653. bool InitSelect(TContext& ctx, ISource* src, bool isJoin, bool& hasError) {
  1654. for (auto& [name, winSpec] : WinSpecs) {
  1655. for (size_t i = 0; i < winSpec->Partitions.size(); ++i) {
  1656. auto partitionNode = winSpec->Partitions[i];
  1657. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(partitionNode.Get())) {
  1658. if (winSpec->Session) {
  1659. ctx.Error(partitionNode->GetPos()) << "Duplicate session window specification:";
  1660. ctx.Error(winSpec->Session->GetPos()) << "Previous session window is declared here";
  1661. hasError = true;
  1662. continue;
  1663. }
  1664. sessionWindow->MarkValid();
  1665. winSpec->Session = partitionNode;
  1666. }
  1667. if (!partitionNode->Init(ctx, src)) {
  1668. hasError = true;
  1669. continue;
  1670. }
  1671. if (!partitionNode->GetLabel() && !partitionNode->GetColumnName()) {
  1672. TString label = TStringBuilder() << "group_" << name << "_" << i;
  1673. partitionNode->SetLabel(label);
  1674. src->AddTmpWindowColumn(label);
  1675. }
  1676. }
  1677. if (!src->AddExpressions(ctx, winSpec->Partitions, EExprSeat::WindowPartitionBy)) {
  1678. hasError = true;
  1679. }
  1680. }
  1681. if (LegacyHoppingWindowSpec) {
  1682. if (!LegacyHoppingWindowSpec->TimeExtractor->Init(ctx, src)) {
  1683. hasError = true;
  1684. }
  1685. src->SetLegacyHoppingWindowSpec(LegacyHoppingWindowSpec);
  1686. }
  1687. for (auto& term: Terms) {
  1688. if (!term->Init(ctx, src)) {
  1689. hasError = true;
  1690. continue;
  1691. }
  1692. auto column = term->GetColumnName();
  1693. TString label(term->GetLabel());
  1694. bool hasName = true;
  1695. if (label.empty()) {
  1696. auto source = term->GetSourceName();
  1697. if (term->IsAsterisk() && !source->empty()) {
  1698. Columns.QualifiedAll = true;
  1699. label = DotJoin(*source, "*");
  1700. } else if (column) {
  1701. label = isJoin && source && *source ? DotJoin(*source, *column) : *column;
  1702. } else {
  1703. label = Columns.AddUnnamed();
  1704. hasName = false;
  1705. if (ctx.WarnUnnamedColumns) {
  1706. ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN)
  1707. << "Autogenerated column name " << label << " will be used for expression";
  1708. }
  1709. }
  1710. }
  1711. if (hasName && !Columns.Add(&label, false, false, true)) {
  1712. ctx.Error(Pos) << "Duplicate column: " << label;
  1713. hasError = true;
  1714. }
  1715. }
  1716. CompositeTerms = Y();
  1717. if (!hasError && Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty()) {
  1718. auto compositeSrcPtr = static_cast<TCompositeSelect*>(Source->GetCompositeSource());
  1719. if (compositeSrcPtr) {
  1720. const auto& groupings = compositeSrcPtr->GetGroupingCols();
  1721. for (const auto& column: groupings) {
  1722. if (Source->IsGroupByColumn(column)) {
  1723. continue;
  1724. }
  1725. const TString tableName = (GroupByExpr || DistinctAggrExpr) ? "preaggregated" : "origcore";
  1726. CompositeTerms = L(CompositeTerms, Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(Pos, column), Y("Nothing", Y("MatchType",
  1727. Y("StructMemberType", Y("ListItemType", Y("TypeOf", tableName)), Q(column)),
  1728. Q("Optional"), Y("lambda", Q(Y("item")), "item"), Y("lambda", Q(Y("item")), Y("OptionalType", "item")))))));
  1729. }
  1730. }
  1731. }
  1732. for (auto iter: WinSpecs) {
  1733. auto winSpec = *iter.second;
  1734. for (auto orderSpec: winSpec.OrderBy) {
  1735. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  1736. hasError = true;
  1737. }
  1738. }
  1739. }
  1740. if (Columns.All || Columns.QualifiedAll) {
  1741. Source->AllColumns();
  1742. }
  1743. for (const auto& without: Without) {
  1744. auto namePtr = without->GetColumnName();
  1745. auto sourcePtr = without->GetSourceName();
  1746. YQL_ENSURE(namePtr && *namePtr);
  1747. if (isJoin && !(sourcePtr && *sourcePtr)) {
  1748. ctx.Error(without->GetPos()) << "Expected correlation name for WITHOUT in JOIN";
  1749. hasError = true;
  1750. continue;
  1751. }
  1752. }
  1753. if (Having && !Having->Init(ctx, src)) {
  1754. hasError = true;
  1755. }
  1756. if (!src->IsCompositeSource() && !Columns.All && src->HasAggregations()) {
  1757. WarnIfAliasFromSelectIsUsedInGroupBy(ctx, Terms, GroupBy, GroupByExpr);
  1758. /// verify select aggregation compatibility
  1759. TVector<TNodePtr> exprs(Terms);
  1760. if (Having) {
  1761. exprs.push_back(Having);
  1762. }
  1763. for (const auto& iter: WinSpecs) {
  1764. for (const auto& sortSpec: iter.second->OrderBy) {
  1765. exprs.push_back(sortSpec->OrderExpr);
  1766. }
  1767. }
  1768. if (!ValidateAllNodesForAggregation(ctx, exprs)) {
  1769. hasError = true;
  1770. }
  1771. }
  1772. const auto label = GetLabel();
  1773. for (const auto& sortSpec: OrderBy) {
  1774. auto& expr = sortSpec->OrderExpr;
  1775. SetLabel(Source->GetLabel());
  1776. OrderByInit = true;
  1777. if (!expr->Init(ctx, this)) {
  1778. hasError = true;
  1779. continue;
  1780. }
  1781. OrderByInit = false;
  1782. if (!IsComparableExpression(ctx, expr, AssumeSorted, AssumeSorted ? "ASSUME ORDER BY" : "ORDER BY")) {
  1783. hasError = true;
  1784. continue;
  1785. }
  1786. }
  1787. SetLabel(label);
  1788. return !hasError;
  1789. }
  1790. TNodePtr PrepareJoinCoalesce(TContext& ctx, const TNodePtr& base, bool multipleQualifiedAll, const TVector<TString>& coalesceLabels) {
  1791. const bool isJoin = Source->GetJoin();
  1792. const bool needCoalesce = isJoin && ctx.SimpleColumns &&
  1793. (Columns.All || multipleQualifiedAll || ctx.CoalesceJoinKeysOnQualifiedAll);
  1794. if (!needCoalesce) {
  1795. return base;
  1796. }
  1797. auto terms = base;
  1798. const auto& sameKeyMap = Source->GetJoin()->GetSameKeysMap();
  1799. if (sameKeyMap) {
  1800. terms = L(terms, Y("let", "flatSameKeys", "row"));
  1801. for (const auto& [key, sources]: sameKeyMap) {
  1802. auto coalesceKeys = Y();
  1803. for (const auto& label : coalesceLabels) {
  1804. if (sources.contains(label)) {
  1805. coalesceKeys = L(coalesceKeys, Q(DotJoin(label, key)));
  1806. }
  1807. }
  1808. terms = L(terms, Y("let", "flatSameKeys", Y("CoalesceMembers", "flatSameKeys", Q(coalesceKeys))));
  1809. }
  1810. terms = L(terms, Y("let", "row", "flatSameKeys"));
  1811. }
  1812. return terms;
  1813. }
  1814. TNodePtr BuildSqlProject(TContext& ctx, bool ordered) {
  1815. auto sqlProjectArgs = Y();
  1816. const bool isJoin = Source->GetJoin();
  1817. if (Columns.All) {
  1818. YQL_ENSURE(Columns.List.empty());
  1819. auto terms = PrepareWithout(Y());
  1820. auto options = Y();
  1821. if (isJoin && ctx.SimpleColumns) {
  1822. terms = PrepareJoinCoalesce(ctx, terms, false, Source->GetJoin()->GetJoinLabels());
  1823. auto members = Y();
  1824. for (auto& source : Source->GetJoin()->GetJoinLabels()) {
  1825. YQL_ENSURE(!source.empty());
  1826. members = L(members, BuildQuotedAtom(Pos, source + "."));
  1827. }
  1828. if (GroupByExpr.empty() || ctx.BogousStarInGroupByOverJoin) {
  1829. terms = L(terms, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
  1830. } else {
  1831. auto groupExprStruct = Y("AsStruct");
  1832. for (auto node : GroupByExpr) {
  1833. auto label = node->GetLabel();
  1834. YQL_ENSURE(label);
  1835. if (Source->IsGroupByColumn(label)) {
  1836. auto name = BuildQuotedAtom(Pos, label);
  1837. groupExprStruct = L(groupExprStruct, Q(Y(name, Y("Member", "row", name))));
  1838. }
  1839. }
  1840. auto groupColumnsStruct = Y("DivePrefixMembers", "row", Q(members));
  1841. terms = L(terms, Y("let", "res", Y("FlattenMembers", Q(Y(BuildQuotedAtom(Pos, ""), groupExprStruct)),
  1842. Q(Y(BuildQuotedAtom(Pos, ""), groupColumnsStruct)))));
  1843. }
  1844. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1845. } else {
  1846. terms = L(terms, Y("let", "res", "row"));
  1847. }
  1848. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, ""), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1849. } else {
  1850. YQL_ENSURE(!Columns.List.empty());
  1851. YQL_ENSURE(Columns.List.size() == Terms.size());
  1852. TVector<TString> coalesceLabels;
  1853. bool multipleQualifiedAll = false;
  1854. if (isJoin && ctx.SimpleColumns) {
  1855. THashSet<TString> starTerms;
  1856. for (auto& term: Terms) {
  1857. if (term->IsAsterisk()) {
  1858. auto sourceName = term->GetSourceName();
  1859. YQL_ENSURE(*sourceName && !sourceName->empty());
  1860. YQL_ENSURE(Columns.QualifiedAll);
  1861. starTerms.insert(*sourceName);
  1862. }
  1863. }
  1864. TVector<TString> matched;
  1865. TVector<TString> unmatched;
  1866. for (auto& label : Source->GetJoin()->GetJoinLabels()) {
  1867. if (starTerms.contains(label)) {
  1868. matched.push_back(label);
  1869. } else {
  1870. unmatched.push_back(label);
  1871. }
  1872. }
  1873. coalesceLabels.insert(coalesceLabels.end(), matched.begin(), matched.end());
  1874. coalesceLabels.insert(coalesceLabels.end(), unmatched.begin(), unmatched.end());
  1875. multipleQualifiedAll = starTerms.size() > 1;
  1876. }
  1877. auto column = Columns.List.begin();
  1878. auto isNamedColumn = Columns.NamedColumns.begin();
  1879. for (auto& term: Terms) {
  1880. auto sourceName = term->GetSourceName();
  1881. if (!term->IsAsterisk()) {
  1882. auto body = Y();
  1883. body = L(body, Y("let", "res", term));
  1884. TPosition lambdaPos = Pos;
  1885. TPosition aliasPos = Pos;
  1886. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1887. // TODO: recanonize for positions below
  1888. lambdaPos = term->GetPos();
  1889. aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos;
  1890. }
  1891. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res"));
  1892. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1893. projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow"))))));
  1894. }
  1895. if (!*isNamedColumn) {
  1896. projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName"))))));
  1897. }
  1898. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1899. } else {
  1900. auto terms = PrepareWithout(Y());
  1901. auto options = Y();
  1902. if (ctx.SimpleColumns && !isJoin) {
  1903. terms = L(terms, Y("let", "res", "row"));
  1904. } else {
  1905. terms = PrepareJoinCoalesce(ctx, terms, multipleQualifiedAll, coalesceLabels);
  1906. auto members = isJoin ? Y() : Y("FlattenMembers");
  1907. if (isJoin) {
  1908. members = L(members, BuildQuotedAtom(Pos, *sourceName + "."));
  1909. if (ctx.SimpleColumns) {
  1910. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1911. }
  1912. members = Y(ctx.SimpleColumns ? "DivePrefixMembers" : "SelectMembers", "row", Q(members));
  1913. } else {
  1914. auto prefix = BuildQuotedAtom(Pos, ctx.SimpleColumns ? "" : *sourceName + ".");
  1915. members = L(members, Q(Y(prefix, "row")));
  1916. if (!ctx.SimpleColumns) {
  1917. options = L(options, Q(Y(Q("addPrefix"), prefix)));
  1918. }
  1919. }
  1920. terms = L(terms, Y("let", "res", members));
  1921. }
  1922. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1923. }
  1924. ++column;
  1925. ++isNamedColumn;
  1926. }
  1927. }
  1928. for (const auto& [columnName, column]: ExtraSortColumns) {
  1929. auto body = Y();
  1930. body = L(body, Y("let", "res", column));
  1931. TPosition pos = column->GetPos();
  1932. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
  1933. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1934. }
  1935. auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core"))));
  1936. block = L(block, Y("let", "core", Y(ordered ? "OrderedSqlProject" : "SqlProject", "core", Q(sqlProjectArgs))));
  1937. if (!(UniqueSets.empty() && DistinctSets.empty())) {
  1938. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  1939. const auto MakeUniqueHint = [this](INode::TPtr& block, const TColumnsSets& sets, bool distinct) {
  1940. if (!sets.empty()) {
  1941. auto assume = Y(distinct ? "AssumeDistinctHint" : "AssumeUniqueHint", "core");
  1942. if (!sets.front().empty()) {
  1943. for (const auto& columns : sets) {
  1944. auto set = Y();
  1945. for (const auto& column : columns) {
  1946. set = L(set, Q(column));
  1947. }
  1948. assume = L(assume, Q(set));
  1949. }
  1950. }
  1951. block = L(block, Y("let", "core", assume));
  1952. }
  1953. };
  1954. MakeUniqueHint(block, DistinctSets, true);
  1955. MakeUniqueHint(block, UniqueSets, false);
  1956. }
  1957. return Y("block", Q(L(block, Y("return", "core"))));
  1958. }
  1959. private:
  1960. TSourcePtr Source;
  1961. TVector<TNodePtr> GroupByExpr;
  1962. TVector<TNodePtr> DistinctAggrExpr;
  1963. TVector<TNodePtr> GroupBy;
  1964. bool AssumeSorted = false;
  1965. bool CompactGroupBy = false;
  1966. TString GroupBySuffix;
  1967. TVector<TSortSpecificationPtr> OrderBy;
  1968. TNodePtr Having;
  1969. TWinSpecs WinSpecs;
  1970. TNodePtr Flatten;
  1971. TNodePtr PreFlattenMap;
  1972. TNodePtr PreaggregatedMap;
  1973. TNodePtr PrewindowMap;
  1974. TNodePtr Aggregate;
  1975. TNodePtr CalcOverWindow;
  1976. TNodePtr CompositeTerms;
  1977. TVector<TNodePtr> Terms;
  1978. TVector<TNodePtr> Without;
  1979. const bool Distinct;
  1980. bool OrderByInit = false;
  1981. TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
  1982. const bool SelectStream;
  1983. const TWriteSettings Settings;
  1984. const TColumnsSets UniqueSets, DistinctSets;
  1985. TMap<TString, TNodePtr> ExtraSortColumns;
  1986. };
  1987. class TProcessSource: public IRealSource {
  1988. public:
  1989. TProcessSource(
  1990. TPosition pos,
  1991. TSourcePtr source,
  1992. TNodePtr with,
  1993. bool withExtFunction,
  1994. TVector<TNodePtr>&& terms,
  1995. bool listCall,
  1996. bool processStream,
  1997. const TWriteSettings& settings,
  1998. const TVector<TSortSpecificationPtr>& assumeOrderBy
  1999. )
  2000. : IRealSource(pos)
  2001. , Source(std::move(source))
  2002. , With(with)
  2003. , WithExtFunction(withExtFunction)
  2004. , Terms(std::move(terms))
  2005. , ListCall(listCall)
  2006. , ProcessStream(processStream)
  2007. , Settings(settings)
  2008. , AssumeOrderBy(assumeOrderBy)
  2009. {
  2010. }
  2011. void GetInputTables(TTableList& tableList) const override {
  2012. Source->GetInputTables(tableList);
  2013. ISource::GetInputTables(tableList);
  2014. }
  2015. bool DoInit(TContext& ctx, ISource* initSrc) override {
  2016. if (AsInner) {
  2017. Source->UseAsInner();
  2018. }
  2019. if (!Source->Init(ctx, initSrc)) {
  2020. return false;
  2021. }
  2022. if (ProcessStream && !Source->IsStream()) {
  2023. ctx.Error(Pos) << "PROCESS STREAM is unsupported for non-streaming sources";
  2024. return false;
  2025. }
  2026. auto src = Source.Get();
  2027. if (!With) {
  2028. src->AllColumns();
  2029. Columns.SetAll();
  2030. src->FinishColumns();
  2031. return true;
  2032. }
  2033. /// grouped expressions are available in filters
  2034. if (!Source->InitFilters(ctx)) {
  2035. return false;
  2036. }
  2037. TSourcePtr fakeSource = nullptr;
  2038. if (ListCall && !WithExtFunction) {
  2039. fakeSource = BuildFakeSource(src->GetPos());
  2040. src->AllColumns();
  2041. }
  2042. auto processSource = fakeSource != nullptr ? fakeSource.Get() : src;
  2043. Y_DEBUG_ABORT_UNLESS(processSource != nullptr);
  2044. if (!With->Init(ctx, processSource)) {
  2045. return false;
  2046. }
  2047. if (With->GetLabel().empty()) {
  2048. Columns.SetAll();
  2049. } else {
  2050. if (ListCall) {
  2051. ctx.Error(With->GetPos()) << "Label is not allowed to use with TableRows()";
  2052. return false;
  2053. }
  2054. Columns.Add(&With->GetLabel(), false);
  2055. }
  2056. bool hasError = false;
  2057. TNodePtr produce;
  2058. if (WithExtFunction) {
  2059. produce = Y();
  2060. } else {
  2061. TString processCall = (ListCall ? "SqlProcess" : "Apply");
  2062. produce = Y(processCall, With);
  2063. }
  2064. TMaybe<ui32> listPosIndex;
  2065. ui32 termIndex = 0;
  2066. for (auto& term: Terms) {
  2067. if (!term->GetLabel().empty()) {
  2068. ctx.Error(term->GetPos()) << "Labels are not allowed for PROCESS terms";
  2069. hasError = true;
  2070. continue;
  2071. }
  2072. if (!term->Init(ctx, processSource)) {
  2073. hasError = true;
  2074. continue;
  2075. }
  2076. if (ListCall) {
  2077. if (auto atom = dynamic_cast<TTableRows*>(term.Get())) {
  2078. listPosIndex = termIndex;
  2079. }
  2080. }
  2081. ++termIndex;
  2082. produce = L(produce, term);
  2083. }
  2084. if (hasError) {
  2085. return false;
  2086. }
  2087. if (ListCall && !WithExtFunction) {
  2088. YQL_ENSURE(listPosIndex.Defined());
  2089. produce = L(produce, Q(ToString(*listPosIndex)));
  2090. }
  2091. if (!produce->Init(ctx, src)) {
  2092. hasError = true;
  2093. }
  2094. if (!(WithExtFunction && Terms.empty())) {
  2095. TVector<TNodePtr>(1, produce).swap(Terms);
  2096. }
  2097. src->FinishColumns();
  2098. const auto label = GetLabel();
  2099. for (const auto& sortSpec: AssumeOrderBy) {
  2100. auto& expr = sortSpec->OrderExpr;
  2101. SetLabel(Source->GetLabel());
  2102. if (!expr->Init(ctx, this)) {
  2103. hasError = true;
  2104. continue;
  2105. }
  2106. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  2107. hasError = true;
  2108. continue;
  2109. }
  2110. }
  2111. SetLabel(label);
  2112. return !hasError;
  2113. }
  2114. TNodePtr Build(TContext& ctx) override {
  2115. auto input = Source->Build(ctx);
  2116. if (!input) {
  2117. return nullptr;
  2118. }
  2119. if (!With) {
  2120. auto res = input;
  2121. if (ctx.EnableSystemColumns) {
  2122. res = Y("RemoveSystemMembers", res);
  2123. }
  2124. return res;
  2125. }
  2126. TString inputLabel = ListCall ? "inputRowsList" : "core";
  2127. auto block(Y(Y("let", inputLabel, input)));
  2128. auto filter = Source->BuildFilter(ctx, inputLabel);
  2129. if (filter) {
  2130. block = L(block, Y("let", inputLabel, filter));
  2131. }
  2132. if (WithExtFunction) {
  2133. auto preTransform = Y("RemoveSystemMembers", inputLabel);
  2134. if (Terms.size() > 0) {
  2135. preTransform = Y("Map", preTransform, BuildLambda(Pos, Y("row"), Q(Terms[0])));
  2136. }
  2137. block = L(block, Y("let", inputLabel, preTransform));
  2138. block = L(block, Y("let", "transform", With));
  2139. block = L(block, Y("let", "core", Y("Apply", "transform", inputLabel)));
  2140. } else if (ListCall) {
  2141. block = L(block, Y("let", "core", Terms[0]));
  2142. } else {
  2143. auto terms = BuildColumnsTerms(ctx);
  2144. block = L(block, Y("let", "core", Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), terms, "res"))));
  2145. }
  2146. block = L(block, Y("let", "core", Y("AutoDemux", Y("PersistableRepr", "core"))));
  2147. return Y("block", Q(L(block, Y("return", "core"))));
  2148. }
  2149. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  2150. Y_UNUSED(ctx);
  2151. if (AssumeOrderBy.empty()) {
  2152. return nullptr;
  2153. }
  2154. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  2155. }
  2156. EOrderKind GetOrderKind() const override {
  2157. if (!With) {
  2158. return EOrderKind::Passthrough;
  2159. }
  2160. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  2161. }
  2162. bool IsSelect() const override {
  2163. return false;
  2164. }
  2165. bool HasSelectResult() const override {
  2166. return !Settings.Discard;
  2167. }
  2168. bool IsStream() const override {
  2169. return Source->IsStream();
  2170. }
  2171. TWriteSettings GetWriteSettings() const override {
  2172. return Settings;
  2173. }
  2174. TNodePtr DoClone() const final {
  2175. return new TProcessSource(Pos, Source->CloneSource(), SafeClone(With), WithExtFunction,
  2176. CloneContainer(Terms), ListCall, ProcessStream, Settings, CloneContainer(AssumeOrderBy));
  2177. }
  2178. private:
  2179. TNodePtr BuildColumnsTerms(TContext& ctx) {
  2180. Y_UNUSED(ctx);
  2181. TNodePtr terms;
  2182. Y_DEBUG_ABORT_UNLESS(Terms.size() == 1);
  2183. if (Columns.All) {
  2184. terms = Y(Y("let", "res", Y("ToSequence", Terms.front())));
  2185. } else {
  2186. Y_DEBUG_ABORT_UNLESS(Columns.List.size() == Terms.size());
  2187. terms = L(Y(), Y("let", "res",
  2188. L(Y("AsStructUnordered"), Q(Y(BuildQuotedAtom(Pos, Columns.List.front()), Terms.front())))));
  2189. terms = L(terms, Y("let", "res", Y("Just", "res")));
  2190. }
  2191. return terms;
  2192. }
  2193. private:
  2194. TSourcePtr Source;
  2195. TNodePtr With;
  2196. const bool WithExtFunction;
  2197. TVector<TNodePtr> Terms;
  2198. const bool ListCall;
  2199. const bool ProcessStream;
  2200. const TWriteSettings Settings;
  2201. TVector<TSortSpecificationPtr> AssumeOrderBy;
  2202. };
  2203. TSourcePtr BuildProcess(
  2204. TPosition pos,
  2205. TSourcePtr source,
  2206. TNodePtr with,
  2207. bool withExtFunction,
  2208. TVector<TNodePtr>&& terms,
  2209. bool listCall,
  2210. bool processStream,
  2211. const TWriteSettings& settings,
  2212. const TVector<TSortSpecificationPtr>& assumeOrderBy
  2213. ) {
  2214. return new TProcessSource(pos, std::move(source), with, withExtFunction, std::move(terms), listCall, processStream, settings, assumeOrderBy);
  2215. }
  2216. class TNestedProxySource: public IProxySource {
  2217. public:
  2218. TNestedProxySource(TPosition pos, const TVector<TNodePtr>& groupBy, TSourcePtr source)
  2219. : IProxySource(pos, source.Get())
  2220. , CompositeSelect(nullptr)
  2221. , Holder(std::move(source))
  2222. , GroupBy(groupBy)
  2223. {}
  2224. TNestedProxySource(TCompositeSelect* compositeSelect, const TVector<TNodePtr>& groupBy)
  2225. : IProxySource(compositeSelect->GetPos(), compositeSelect->RealSource())
  2226. , CompositeSelect(compositeSelect)
  2227. , GroupBy(groupBy)
  2228. {}
  2229. bool DoInit(TContext& ctx, ISource* src) override {
  2230. return Source->Init(ctx, src);
  2231. }
  2232. TNodePtr Build(TContext& ctx) override {
  2233. return CompositeSelect ? BuildAtom(Pos, "composite", TNodeFlags::Default) : Source->Build(ctx);
  2234. }
  2235. bool InitFilters(TContext& ctx) override {
  2236. return CompositeSelect ? true : Source->InitFilters(ctx);
  2237. }
  2238. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  2239. return CompositeSelect ? nullptr : Source->BuildFilter(ctx, label);
  2240. }
  2241. IJoin* GetJoin() override {
  2242. return Source->GetJoin();
  2243. }
  2244. bool IsCompositeSource() const override {
  2245. return true;
  2246. }
  2247. ISource* GetCompositeSource() override {
  2248. return CompositeSelect;
  2249. }
  2250. bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& hintColumn) override {
  2251. Y_UNUSED(ctx);
  2252. hintColumn = TStringBuilder() << "GroupingHint" << Hints.size();
  2253. ui64 hint = 0;
  2254. if (GroupByColumns.empty()) {
  2255. const bool isJoin = GetJoin();
  2256. for (const auto& groupByNode: GroupBy) {
  2257. auto namePtr = groupByNode->GetColumnName();
  2258. YQL_ENSURE(namePtr);
  2259. TString column = *namePtr;
  2260. if (isJoin) {
  2261. auto sourceNamePtr = groupByNode->GetSourceName();
  2262. if (sourceNamePtr && !sourceNamePtr->empty()) {
  2263. column = DotJoin(*sourceNamePtr, column);
  2264. }
  2265. }
  2266. GroupByColumns.insert(column);
  2267. }
  2268. }
  2269. for (const auto& column: columns) {
  2270. hint <<= 1;
  2271. if (!GroupByColumns.contains(column)) {
  2272. hint += 1;
  2273. }
  2274. }
  2275. Hints.push_back(hint);
  2276. return true;
  2277. }
  2278. size_t GetGroupingColumnsCount() const override {
  2279. return Hints.size();
  2280. }
  2281. TNodePtr BuildGroupingColumns(const TString& label) override {
  2282. if (Hints.empty()) {
  2283. return nullptr;
  2284. }
  2285. auto body = Y();
  2286. for (size_t i = 0; i < Hints.size(); ++i) {
  2287. TString hintColumn = TStringBuilder() << "GroupingHint" << i;
  2288. TString hintValue = ToString(Hints[i]);
  2289. body = L(body, Y("let", "row", Y("AddMember", "row", Q(hintColumn), Y("Uint64", Q(hintValue)))));
  2290. }
  2291. return Y("Map", label, BuildLambda(Pos, Y("row"), body, "row"));
  2292. }
  2293. void FinishColumns() override {
  2294. Source->FinishColumns();
  2295. }
  2296. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2297. if (const TString* columnName = column.GetColumnName()) {
  2298. if (columnName && IsExprAlias(*columnName)) {
  2299. return true;
  2300. }
  2301. }
  2302. return Source->AddColumn(ctx, column);
  2303. }
  2304. TPtr DoClone() const final {
  2305. YQL_ENSURE(Hints.empty());
  2306. return Holder.Get() ? new TNestedProxySource(Pos, CloneContainer(GroupBy), Holder->CloneSource()) :
  2307. new TNestedProxySource(CompositeSelect, CloneContainer(GroupBy));
  2308. }
  2309. private:
  2310. TCompositeSelect* CompositeSelect;
  2311. TSourcePtr Holder;
  2312. TVector<TNodePtr> GroupBy;
  2313. mutable TSet<TString> GroupByColumns;
  2314. mutable TVector<ui64> Hints;
  2315. };
  2316. namespace {
  2317. TSourcePtr DoBuildSelectCore(
  2318. TContext& ctx,
  2319. TPosition pos,
  2320. TSourcePtr originalSource,
  2321. TSourcePtr source,
  2322. const TVector<TNodePtr>& groupByExpr,
  2323. const TVector<TNodePtr>& groupBy,
  2324. bool compactGroupBy,
  2325. const TString& groupBySuffix,
  2326. bool assumeSorted,
  2327. const TVector<TSortSpecificationPtr>& orderBy,
  2328. TNodePtr having,
  2329. TWinSpecs&& winSpecs,
  2330. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2331. TVector<TNodePtr>&& terms,
  2332. bool distinct,
  2333. TVector<TNodePtr>&& without,
  2334. bool selectStream,
  2335. const TWriteSettings& settings,
  2336. TColumnsSets&& uniqueSets,
  2337. TColumnsSets&& distinctSets
  2338. ) {
  2339. if (groupBy.empty() || !groupBy.front()->ContentListPtr()) {
  2340. return new TSelectCore(pos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted,
  2341. orderBy, having, winSpecs, legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2342. }
  2343. if (groupBy.size() == 1) {
  2344. /// actualy no big idea to use grouping function in this case (result allways 0)
  2345. auto contentPtr = groupBy.front()->ContentListPtr();
  2346. source = new TNestedProxySource(pos, *contentPtr, source);
  2347. return DoBuildSelectCore(ctx, pos, originalSource, source, groupByExpr, *contentPtr, compactGroupBy, groupBySuffix,
  2348. assumeSorted, orderBy, having, std::move(winSpecs),
  2349. legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2350. }
  2351. /// \todo some smart merge logic, generalize common part of grouping (expr, flatten, etc)?
  2352. TIntrusivePtr<TCompositeSelect> compositeSelect = new TCompositeSelect(pos, std::move(source), originalSource->CloneSource(), settings);
  2353. size_t totalGroups = 0;
  2354. TVector<TSourcePtr> subselects;
  2355. TVector<TNodePtr> groupingCols;
  2356. for (auto& grouping: groupBy) {
  2357. auto contentPtr = grouping->ContentListPtr();
  2358. TVector<TNodePtr> cache(1, nullptr);
  2359. if (!contentPtr) {
  2360. cache[0] = grouping;
  2361. contentPtr = &cache;
  2362. }
  2363. groupingCols.insert(groupingCols.end(), contentPtr->cbegin(), contentPtr->cend());
  2364. TSourcePtr proxySource = new TNestedProxySource(compositeSelect.Get(), CloneContainer(*contentPtr));
  2365. if (!subselects.empty()) {
  2366. /// clone terms for others usage
  2367. TVector<TNodePtr> termsCopy;
  2368. for (const auto& term: terms) {
  2369. termsCopy.emplace_back(term->Clone());
  2370. }
  2371. std::swap(terms, termsCopy);
  2372. }
  2373. totalGroups += contentPtr->size();
  2374. TSelectCore* selectCore = new TSelectCore(pos, std::move(proxySource), CloneContainer(groupByExpr),
  2375. CloneContainer(*contentPtr), compactGroupBy, groupBySuffix, assumeSorted, orderBy, SafeClone(having), CloneContainer(winSpecs),
  2376. legacyHoppingWindowSpec, terms, distinct, without, selectStream, settings, TColumnsSets(uniqueSets), TColumnsSets(distinctSets));
  2377. subselects.emplace_back(selectCore);
  2378. }
  2379. if (totalGroups > ctx.PragmaGroupByLimit) {
  2380. ctx.Error(pos) << "Unable to GROUP BY more than " << ctx.PragmaGroupByLimit << " groups, you try use " << totalGroups << " groups";
  2381. return nullptr;
  2382. }
  2383. compositeSelect->SetSubselects(std::move(subselects), std::move(groupingCols), CloneContainer(groupByExpr));
  2384. return compositeSelect;
  2385. }
  2386. }
  2387. TSourcePtr BuildSelectCore(
  2388. TContext& ctx,
  2389. TPosition pos,
  2390. TSourcePtr source,
  2391. const TVector<TNodePtr>& groupByExpr,
  2392. const TVector<TNodePtr>& groupBy,
  2393. bool compactGroupBy,
  2394. const TString& groupBySuffix,
  2395. bool assumeSorted,
  2396. const TVector<TSortSpecificationPtr>& orderBy,
  2397. TNodePtr having,
  2398. TWinSpecs&& winSpecs,
  2399. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2400. TVector<TNodePtr>&& terms,
  2401. bool distinct,
  2402. TVector<TNodePtr>&& without,
  2403. bool selectStream,
  2404. const TWriteSettings& settings,
  2405. TColumnsSets&& uniqueSets,
  2406. TColumnsSets&& distinctSets
  2407. )
  2408. {
  2409. return DoBuildSelectCore(ctx, pos, source, source, groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy,
  2410. having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2411. }
  2412. class TUnion: public IRealSource {
  2413. public:
  2414. TUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings)
  2415. : IRealSource(pos)
  2416. , Sources(std::move(sources))
  2417. , QuantifierAll(quantifierAll)
  2418. , Settings(settings)
  2419. {
  2420. }
  2421. const TColumns* GetColumns() const override {
  2422. return IRealSource::GetColumns();
  2423. }
  2424. void GetInputTables(TTableList& tableList) const override {
  2425. for (auto& x : Sources) {
  2426. x->GetInputTables(tableList);
  2427. }
  2428. ISource::GetInputTables(tableList);
  2429. }
  2430. bool DoInit(TContext& ctx, ISource* src) override {
  2431. bool first = true;
  2432. for (auto& s: Sources) {
  2433. s->UseAsInner();
  2434. if (!s->Init(ctx, src)) {
  2435. return false;
  2436. }
  2437. if (!ctx.PositionalUnionAll || first) {
  2438. auto c = s->GetColumns();
  2439. Y_DEBUG_ABORT_UNLESS(c);
  2440. Columns.Merge(*c);
  2441. first = false;
  2442. }
  2443. }
  2444. return true;
  2445. }
  2446. TNodePtr Build(TContext& ctx) override {
  2447. TPtr res;
  2448. if (QuantifierAll) {
  2449. res = ctx.PositionalUnionAll ? Y("UnionAllPositional") : Y("UnionAll");
  2450. } else {
  2451. res = ctx.PositionalUnionAll ? Y("UnionPositional") : Y("Union");
  2452. }
  2453. for (auto& s: Sources) {
  2454. auto input = s->Build(ctx);
  2455. if (!input) {
  2456. return nullptr;
  2457. }
  2458. res->Add(input);
  2459. }
  2460. return res;
  2461. }
  2462. bool IsStream() const override {
  2463. for (auto& s: Sources) {
  2464. if (!s->IsStream()) {
  2465. return false;
  2466. }
  2467. }
  2468. return true;
  2469. }
  2470. TNodePtr DoClone() const final {
  2471. return MakeIntrusive<TUnion>(Pos, CloneContainer(Sources), QuantifierAll, Settings);
  2472. }
  2473. bool IsSelect() const override {
  2474. return true;
  2475. }
  2476. bool HasSelectResult() const override {
  2477. return !Settings.Discard;
  2478. }
  2479. TWriteSettings GetWriteSettings() const override {
  2480. return Settings;
  2481. }
  2482. private:
  2483. TVector<TSourcePtr> Sources;
  2484. bool QuantifierAll;
  2485. const TWriteSettings Settings;
  2486. };
  2487. TSourcePtr BuildUnion(
  2488. TPosition pos,
  2489. TVector<TSourcePtr>&& sources,
  2490. bool quantifierAll,
  2491. const TWriteSettings& settings
  2492. ) {
  2493. return new TUnion(pos, std::move(sources), quantifierAll, settings);
  2494. }
  2495. class TOverWindowSource: public IProxySource {
  2496. public:
  2497. TOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource)
  2498. : IProxySource(pos, origSource)
  2499. , WindowName(windowName)
  2500. {
  2501. Source->SetLabel(origSource->GetLabel());
  2502. }
  2503. TString MakeLocalName(const TString& name) override {
  2504. return Source->MakeLocalName(name);
  2505. }
  2506. void AddTmpWindowColumn(const TString& column) override {
  2507. return Source->AddTmpWindowColumn(column);
  2508. }
  2509. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  2510. if (aggr->IsOverWindow() || aggr->IsOverWindowDistinct()) {
  2511. return Source->AddAggregationOverWindow(ctx, WindowName, aggr);
  2512. }
  2513. return Source->AddAggregation(ctx, aggr);
  2514. }
  2515. bool AddFuncOverWindow(TContext& ctx, TNodePtr expr) override {
  2516. return Source->AddFuncOverWindow(ctx, WindowName, expr);
  2517. }
  2518. bool IsOverWindowSource() const override {
  2519. return true;
  2520. }
  2521. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2522. return Source->AddColumn(ctx, column);
  2523. }
  2524. TNodePtr Build(TContext& ctx) override {
  2525. Y_UNUSED(ctx);
  2526. Y_ABORT("Unexpected call");
  2527. }
  2528. const TString* GetWindowName() const override {
  2529. return &WindowName;
  2530. }
  2531. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  2532. return Source->FindWindowSpecification(ctx, windowName);
  2533. }
  2534. TNodePtr GetSessionWindowSpec() const override {
  2535. return Source->GetSessionWindowSpec();
  2536. }
  2537. TNodePtr DoClone() const final {
  2538. return {};
  2539. }
  2540. private:
  2541. const TString WindowName;
  2542. };
  2543. TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) {
  2544. return new TOverWindowSource(pos, windowName, origSource);
  2545. }
  2546. class TSkipTakeNode final: public TAstListNode {
  2547. public:
  2548. TSkipTakeNode(TPosition pos, const TNodePtr& skip, const TNodePtr& take)
  2549. : TAstListNode(pos), IsSkipProvided_(!!skip)
  2550. {
  2551. TNodePtr select(AstNode("select"));
  2552. if (skip) {
  2553. select = Y("Skip", select, Y("Coalesce", skip, Y("Uint64", Q("0"))));
  2554. }
  2555. static const TString uiMax = ::ToString(std::numeric_limits<ui64>::max());
  2556. Add("let", "select", Y("Take", select, Y("Coalesce", take, Y("Uint64", Q(uiMax)))));
  2557. }
  2558. TPtr DoClone() const final {
  2559. return {};
  2560. }
  2561. bool HasSkip() const {
  2562. return IsSkipProvided_;
  2563. }
  2564. private:
  2565. const bool IsSkipProvided_;
  2566. };
  2567. TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take) {
  2568. return new TSkipTakeNode(pos, skip, take);
  2569. }
  2570. class TSelect: public IProxySource {
  2571. public:
  2572. TSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake)
  2573. : IProxySource(pos, source.Get())
  2574. , Source(std::move(source))
  2575. , SkipTake(skipTake)
  2576. {}
  2577. bool DoInit(TContext& ctx, ISource* src) override {
  2578. Source->SetLabel(Label);
  2579. if (AsInner) {
  2580. Source->UseAsInner();
  2581. }
  2582. if (IgnoreSort()) {
  2583. Source->DisableSort();
  2584. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY) << "ORDER BY without LIMIT in subquery will be ignored";
  2585. }
  2586. if (!Source->Init(ctx, src)) {
  2587. return false;
  2588. }
  2589. src = Source.Get();
  2590. if (SkipTake) {
  2591. FakeSource = BuildFakeSource(SkipTake->GetPos());
  2592. if (!SkipTake->Init(ctx, FakeSource.Get())) {
  2593. return false;
  2594. }
  2595. if (SkipTake->HasSkip() && EOrderKind::Sort != Source->GetOrderKind()) {
  2596. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_OFFSET_WITHOUT_SORT) << "LIMIT with OFFSET without ORDER BY may provide different results from run to run";
  2597. }
  2598. }
  2599. return true;
  2600. }
  2601. TNodePtr Build(TContext& ctx) override {
  2602. auto input = Source->Build(ctx);
  2603. if (!input) {
  2604. return nullptr;
  2605. }
  2606. const auto label = "select";
  2607. auto block(Y(Y("let", label, input)));
  2608. auto sortNode = Source->BuildSort(ctx, label);
  2609. if (sortNode && !IgnoreSort()) {
  2610. block = L(block, sortNode);
  2611. }
  2612. if (SkipTake) {
  2613. block = L(block, SkipTake);
  2614. }
  2615. TNodePtr sample;
  2616. if (!BuildSamplingLambda(sample)) {
  2617. return nullptr;
  2618. } else if (sample) {
  2619. block = L(block, Y("let", "select", Y("OrderedFlatMap", "select", sample)));
  2620. }
  2621. if (auto removeNode = Source->BuildCleanupColumns(ctx, label)) {
  2622. block = L(block, removeNode);
  2623. }
  2624. block = L(block, Y("return", label));
  2625. return Y("block", Q(block));
  2626. }
  2627. bool SetSamplingOptions(
  2628. TContext& ctx,
  2629. TPosition pos,
  2630. ESampleClause sampleClause,
  2631. ESampleMode mode,
  2632. TNodePtr samplingRate,
  2633. TNodePtr samplingSeed) override {
  2634. if (mode == ESampleMode::System) {
  2635. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  2636. return false;
  2637. }
  2638. if (samplingSeed) {
  2639. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  2640. return false;
  2641. }
  2642. return SetSamplingRate(ctx, sampleClause, samplingRate);
  2643. }
  2644. bool IsSelect() const override {
  2645. return Source->IsSelect();
  2646. }
  2647. bool HasSelectResult() const override {
  2648. return Source->HasSelectResult();
  2649. }
  2650. TPtr DoClone() const final {
  2651. return MakeIntrusive<TSelect>(Pos, Source->CloneSource(), SafeClone(SkipTake));
  2652. }
  2653. protected:
  2654. bool IgnoreSort() const {
  2655. return AsInner && !SkipTake && EOrderKind::Sort == Source->GetOrderKind();
  2656. }
  2657. TSourcePtr Source;
  2658. TNodePtr SkipTake;
  2659. TSourcePtr FakeSource;
  2660. };
  2661. TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) {
  2662. return new TSelect(pos, std::move(source), skipTake);
  2663. }
  2664. class TSelectResultNode final: public TAstListNode {
  2665. public:
  2666. TSelectResultNode(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2667. TScopedStatePtr scoped)
  2668. : TAstListNode(pos)
  2669. , Source(std::move(source))
  2670. , WriteResult(writeResult)
  2671. , InSubquery(inSubquery)
  2672. , Scoped(scoped)
  2673. {
  2674. YQL_ENSURE(Source, "Invalid source node");
  2675. FakeSource = BuildFakeSource(pos);
  2676. }
  2677. bool IsSelect() const override {
  2678. return true;
  2679. }
  2680. bool HasSelectResult() const override {
  2681. return Source->HasSelectResult();
  2682. }
  2683. bool DoInit(TContext& ctx, ISource* src) override {
  2684. if (!Source->Init(ctx, src)) {
  2685. return false;
  2686. }
  2687. src = Source.Get();
  2688. TTableList tableList;
  2689. Source->GetInputTables(tableList);
  2690. TNodePtr node(BuildInputTables(Pos, tableList, InSubquery, Scoped));
  2691. if (!node->Init(ctx, src)) {
  2692. return false;
  2693. }
  2694. auto writeSettings = src->GetWriteSettings();
  2695. bool asRef = ctx.PragmaRefSelect;
  2696. bool asAutoRef = true;
  2697. if (ctx.PragmaSampleSelect) {
  2698. asRef = false;
  2699. asAutoRef = false;
  2700. }
  2701. auto settings = Y(Q(Y(Q("type"))));
  2702. if (writeSettings.Discard) {
  2703. settings = L(settings, Q(Y(Q("discard"))));
  2704. }
  2705. if (!writeSettings.Label.Empty()) {
  2706. auto labelNode = writeSettings.Label.Build();
  2707. if (!writeSettings.Label.GetLiteral()) {
  2708. labelNode = Y("EvaluateAtom", labelNode);
  2709. }
  2710. if (!labelNode->Init(ctx, FakeSource.Get())) {
  2711. return false;
  2712. }
  2713. settings = L(settings, Q(Y(Q("label"), labelNode)));
  2714. }
  2715. if (asRef) {
  2716. settings = L(settings, Q(Y(Q("ref"))));
  2717. } else if (asAutoRef) {
  2718. settings = L(settings, Q(Y(Q("autoref"))));
  2719. }
  2720. auto columns = Source->GetColumns();
  2721. if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) {
  2722. auto list = Y();
  2723. YQL_ENSURE(columns->List.size() == columns->NamedColumns.size());
  2724. for (size_t i = 0; i < columns->List.size(); ++i) {
  2725. auto& c = columns->List[i];
  2726. if (c.EndsWith('*')) {
  2727. list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1)))));
  2728. } else if (columns->NamedColumns[i]) {
  2729. list = L(list, BuildQuotedAtom(Pos, c));
  2730. } else {
  2731. list = L(list, Q(Y(Q("auto"))));
  2732. }
  2733. }
  2734. settings = L(settings, Q(Y(Q("columns"), Q(list))));
  2735. }
  2736. if (ctx.ResultRowsLimit > 0) {
  2737. settings = L(settings, Q(Y(Q("take"), Q(ToString(ctx.ResultRowsLimit)))));
  2738. }
  2739. auto output = Source->Build(ctx);
  2740. if (!output) {
  2741. return false;
  2742. }
  2743. node = L(node, Y("let", "output", output));
  2744. if (WriteResult || writeSettings.Discard) {
  2745. if (EOrderKind::None == Source->GetOrderKind() && ctx.UseUnordered(*Source)) {
  2746. node = L(node, Y("let", "output", Y("Unordered", "output")));
  2747. if (ctx.UnorderedResult) {
  2748. settings = L(settings, Q(Y(Q("unordered"))));
  2749. }
  2750. }
  2751. auto writeResult(BuildWriteResult(Pos, "output", settings));
  2752. if (!writeResult->Init(ctx, src)) {
  2753. return false;
  2754. }
  2755. node = L(node, Y("let", "world", writeResult));
  2756. node = L(node, Y("return", "world"));
  2757. } else {
  2758. node = L(node, Y("return", "output"));
  2759. }
  2760. Add("block", Q(node));
  2761. return true;
  2762. }
  2763. TPtr DoClone() const final {
  2764. return {};
  2765. }
  2766. protected:
  2767. TSourcePtr Source;
  2768. const bool WriteResult;
  2769. const bool InSubquery;
  2770. TScopedStatePtr Scoped;
  2771. TSourcePtr FakeSource;
  2772. };
  2773. TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2774. TScopedStatePtr scoped) {
  2775. return new TSelectResultNode(pos, std::move(source), writeResult, inSubquery, scoped);
  2776. }
  2777. } // namespace NSQLTranslationV1