select.cpp 106 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242
  1. #include "sql.h"
  2. #include "source.h"
  3. #include "context.h"
  4. #include "match_recognize.h"
  5. #include <yql/essentials/providers/common/provider/yql_provider_names.h>
  6. #include <yql/essentials/utils/yql_panic.h>
  7. #include <library/cpp/charset/ci_string.h>
  8. using namespace NYql;
  9. namespace NSQLTranslationV1 {
  10. class TSubqueryNode: public INode {
  11. public:
  12. TSubqueryNode(TSourcePtr&& source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped)
  13. : INode(source->GetPos())
  14. , Source(std::move(source))
  15. , Alias(alias)
  16. , InSubquery(inSubquery)
  17. , EnsureTupleSize(ensureTupleSize)
  18. , Scoped(scoped)
  19. {
  20. YQL_ENSURE(!Alias.empty());
  21. }
  22. ISource* GetSource() override {
  23. return Source.Get();
  24. }
  25. bool DoInit(TContext& ctx, ISource* src) override {
  26. YQL_ENSURE(!src, "Source not expected for subquery node");
  27. Source->UseAsInner();
  28. if (!Source->Init(ctx, nullptr)) {
  29. return false;
  30. }
  31. TTableList tableList;
  32. Source->GetInputTables(tableList);
  33. auto tables = BuildInputTables(Pos, tableList, InSubquery, Scoped);
  34. if (!tables->Init(ctx, Source.Get())) {
  35. return false;
  36. }
  37. auto source = Source->Build(ctx);
  38. if (!source) {
  39. return false;
  40. }
  41. if (EnsureTupleSize != -1) {
  42. source = Y("EnsureTupleSize", source, Q(ToString(EnsureTupleSize)));
  43. }
  44. Node = Y("let", Alias, Y("block", Q(L(tables, Y("return", Q(Y("world", source)))))));
  45. IsUsed = true;
  46. return true;
  47. }
  48. void DoUpdateState() const override {
  49. State.Set(ENodeState::Const, true);
  50. }
  51. bool UsedSubquery() const override {
  52. return IsUsed;
  53. }
  54. TAstNode* Translate(TContext& ctx) const override {
  55. Y_DEBUG_ABORT_UNLESS(Node);
  56. return Node->Translate(ctx);
  57. }
  58. const TString* SubqueryAlias() const override {
  59. return &Alias;
  60. }
  61. TPtr DoClone() const final {
  62. return new TSubqueryNode(Source->CloneSource(), Alias, InSubquery, EnsureTupleSize, Scoped);
  63. }
  64. protected:
  65. TSourcePtr Source;
  66. TNodePtr Node;
  67. const TString Alias;
  68. const bool InSubquery;
  69. const int EnsureTupleSize;
  70. bool IsUsed = false;
  71. TScopedStatePtr Scoped;
  72. };
  73. TNodePtr BuildSubquery(TSourcePtr source, const TString& alias, bool inSubquery, int ensureTupleSize, TScopedStatePtr scoped) {
  74. return new TSubqueryNode(std::move(source), alias, inSubquery, ensureTupleSize, scoped);
  75. }
  76. class TSourceNode: public INode {
  77. public:
  78. TSourceNode(TPosition pos, TSourcePtr&& source, bool checkExist, bool withTables)
  79. : INode(pos)
  80. , Source(std::move(source))
  81. , CheckExist(checkExist)
  82. , WithTables(withTables)
  83. {}
  84. ISource* GetSource() override {
  85. return Source.Get();
  86. }
  87. bool DoInit(TContext& ctx, ISource* src) override {
  88. if (AsInner) {
  89. Source->UseAsInner();
  90. }
  91. if (!Source->Init(ctx, src)) {
  92. return false;
  93. }
  94. Node = Source->Build(ctx);
  95. if (!Node) {
  96. return false;
  97. }
  98. if (src) {
  99. if (IsSubquery()) {
  100. /// should be not used?
  101. auto columnsPtr = Source->GetColumns();
  102. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  103. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  104. } else {
  105. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  106. if (RefPos) {
  107. ctx.Error(*RefPos) << "Source is used here";
  108. }
  109. return false;
  110. }
  111. }
  112. src->AddDependentSource(Source.Get());
  113. }
  114. if (Node && WithTables) {
  115. TTableList tableList;
  116. Source->GetInputTables(tableList);
  117. TNodePtr inputTables(BuildInputTables(ctx.Pos(), tableList, IsSubquery(), ctx.Scoped));
  118. if (!inputTables->Init(ctx, Source.Get())) {
  119. return false;
  120. }
  121. auto blockContent = inputTables;
  122. blockContent = L(blockContent, Y("return", Node));
  123. Node = Y("block", Q(blockContent));
  124. }
  125. return true;
  126. }
  127. bool IsSubquery() const {
  128. return !AsInner && Source->IsSelect() && !CheckExist;
  129. }
  130. void DoUpdateState() const override {
  131. State.Set(ENodeState::Const, IsSubquery());
  132. }
  133. TAstNode* Translate(TContext& ctx) const override {
  134. Y_DEBUG_ABORT_UNLESS(Node);
  135. return Node->Translate(ctx);
  136. }
  137. TPtr DoClone() const final {
  138. return new TSourceNode(Pos, Source->CloneSource(), CheckExist, WithTables);
  139. }
  140. protected:
  141. TSourcePtr Source;
  142. TNodePtr Node;
  143. bool CheckExist;
  144. bool WithTables;
  145. };
  146. TNodePtr BuildSourceNode(TPosition pos, TSourcePtr source, bool checkExist, bool withTables) {
  147. return new TSourceNode(pos, std::move(source), checkExist, withTables);
  148. }
  149. class TFakeSource: public ISource {
  150. public:
  151. TFakeSource(TPosition pos, bool missingFrom, bool inSubquery)
  152. : ISource(pos)
  153. , MissingFrom(missingFrom)
  154. , InSubquery(inSubquery)
  155. {}
  156. bool IsFake() const override {
  157. return true;
  158. }
  159. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  160. // TODO: fix column reference scope - with proper scopes error below should happen earlier
  161. if (column.CanBeType()) {
  162. return true;
  163. }
  164. ctx.Error(Pos) << (MissingFrom ? "Column references are not allowed without FROM" : "Source does not allow column references");
  165. ctx.Error(column.GetPos()) << "Column reference "
  166. << (column.GetColumnName() ? "'" + *column.GetColumnName() + "'" : "(expr)");
  167. return {};
  168. }
  169. bool AddFilter(TContext& ctx, TNodePtr filter) override {
  170. Y_UNUSED(filter);
  171. auto pos = filter ? filter->GetPos() : Pos;
  172. ctx.Error(pos) << (MissingFrom ? "Filtering is not allowed without FROM" : "Source does not allow filtering");
  173. return false;
  174. }
  175. TNodePtr Build(TContext& ctx) override {
  176. Y_UNUSED(ctx);
  177. auto ret = Y("AsList", Y("AsStruct"));
  178. if (InSubquery) {
  179. return Y("WithWorld", ret, "world");
  180. } else {
  181. return ret;
  182. }
  183. }
  184. bool AddGroupKey(TContext& ctx, const TString& column) override {
  185. Y_UNUSED(column);
  186. ctx.Error(Pos) << "Grouping is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  187. return false;
  188. }
  189. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  190. YQL_ENSURE(aggr);
  191. ctx.Error(aggr->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  192. return false;
  193. }
  194. bool AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) override {
  195. Y_UNUSED(windowName);
  196. YQL_ENSURE(func);
  197. ctx.Error(func->GetPos()) << "Aggregation is not allowed " << (MissingFrom ? "without FROM" : "in this context");
  198. return false;
  199. }
  200. bool AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) override {
  201. Y_UNUSED(windowName);
  202. YQL_ENSURE(func);
  203. ctx.Error(func->GetPos()) << "Window functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  204. return false;
  205. }
  206. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  207. Y_UNUSED(windowName);
  208. ctx.Error(Pos) << "Window and aggregation functions are not allowed " << (MissingFrom ? "without FROM" : "in this context");
  209. return {};
  210. }
  211. bool IsGroupByColumn(const TString& column) const override {
  212. Y_UNUSED(column);
  213. return false;
  214. }
  215. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  216. Y_UNUSED(ctx);
  217. Y_UNUSED(label);
  218. return nullptr;
  219. }
  220. std::pair<TNodePtr, bool> BuildAggregation(const TString& label, TContext& ctx) override {
  221. Y_UNUSED(label);
  222. Y_UNUSED(ctx);
  223. return { nullptr, true };
  224. }
  225. TPtr DoClone() const final {
  226. return new TFakeSource(Pos, MissingFrom, InSubquery);
  227. }
  228. private:
  229. const bool MissingFrom;
  230. const bool InSubquery;
  231. };
  232. TSourcePtr BuildFakeSource(TPosition pos, bool missingFrom, bool inSubquery) {
  233. return new TFakeSource(pos, missingFrom, inSubquery);
  234. }
  235. class TNodeSource: public ISource {
  236. public:
  237. TNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList, bool wrapByTableSource)
  238. : ISource(pos)
  239. , Node(node)
  240. , WrapToList(wrapToList)
  241. , WrapByTableSource(wrapByTableSource)
  242. {
  243. YQL_ENSURE(Node);
  244. FakeSource = BuildFakeSource(pos);
  245. }
  246. bool ShouldUseSourceAsColumn(const TString& source) const final {
  247. return source && source != GetLabel();
  248. }
  249. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  250. Y_UNUSED(ctx);
  251. Y_UNUSED(column);
  252. return true;
  253. }
  254. bool DoInit(TContext& ctx, ISource* src) final {
  255. if (!Node->Init(ctx, FakeSource.Get())) {
  256. return false;
  257. }
  258. return ISource::DoInit(ctx, src);
  259. }
  260. TNodePtr Build(TContext& /*ctx*/) final {
  261. auto nodeAst = AstNode(Node);
  262. if (WrapToList) {
  263. nodeAst = Y("ToList", nodeAst);
  264. }
  265. if (WrapByTableSource) {
  266. nodeAst = Y("TableSource", nodeAst);
  267. }
  268. return nodeAst;
  269. }
  270. TPtr DoClone() const final {
  271. return new TNodeSource(Pos, SafeClone(Node), WrapToList, WrapByTableSource);
  272. }
  273. private:
  274. TNodePtr Node;
  275. const bool WrapToList;
  276. const bool WrapByTableSource;
  277. TSourcePtr FakeSource;
  278. };
  279. TSourcePtr BuildNodeSource(TPosition pos, const TNodePtr& node, bool wrapToList, bool wrapByTableSource) {
  280. return new TNodeSource(pos, node, wrapToList, wrapByTableSource);
  281. }
  282. class IProxySource: public ISource {
  283. protected:
  284. IProxySource(TPosition pos, ISource* src)
  285. : ISource(pos)
  286. , Source(src)
  287. {}
  288. void AllColumns() override {
  289. Y_DEBUG_ABORT_UNLESS(Source);
  290. return Source->AllColumns();
  291. }
  292. const TColumns* GetColumns() const override {
  293. Y_DEBUG_ABORT_UNLESS(Source);
  294. return Source->GetColumns();
  295. }
  296. void GetInputTables(TTableList& tableList) const override {
  297. if (Source) {
  298. Source->GetInputTables(tableList);
  299. }
  300. ISource::GetInputTables(tableList);
  301. }
  302. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  303. Y_DEBUG_ABORT_UNLESS(Source);
  304. const TString label(Source->GetLabel());
  305. Source->SetLabel(Label);
  306. const auto ret = Source->AddColumn(ctx, column);
  307. Source->SetLabel(label);
  308. return ret;
  309. }
  310. bool ShouldUseSourceAsColumn(const TString& source) const override {
  311. return Source->ShouldUseSourceAsColumn(source);
  312. }
  313. bool IsStream() const override {
  314. Y_DEBUG_ABORT_UNLESS(Source);
  315. return Source->IsStream();
  316. }
  317. EOrderKind GetOrderKind() const override {
  318. Y_DEBUG_ABORT_UNLESS(Source);
  319. return Source->GetOrderKind();
  320. }
  321. TWriteSettings GetWriteSettings() const override {
  322. Y_DEBUG_ABORT_UNLESS(Source);
  323. return Source->GetWriteSettings();
  324. }
  325. protected:
  326. void SetSource(ISource* source) {
  327. Source = source;
  328. }
  329. ISource* Source;
  330. };
  331. class IRealSource: public ISource {
  332. protected:
  333. IRealSource(TPosition pos)
  334. : ISource(pos)
  335. {
  336. }
  337. void AllColumns() override {
  338. Columns.SetAll();
  339. }
  340. const TColumns* GetColumns() const override {
  341. return &Columns;
  342. }
  343. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  344. const auto& label = *column.GetSourceName();
  345. const auto& source = GetLabel();
  346. if (!label.empty() && label != source && !(source.StartsWith(label) && source[label.size()] == ':')) {
  347. if (column.IsReliable()) {
  348. ctx.Error(column.GetPos()) << "Unknown correlation name: " << label;
  349. }
  350. return {};
  351. }
  352. if (column.IsAsterisk()) {
  353. return true;
  354. }
  355. const auto* name = column.GetColumnName();
  356. if (name && !column.CanBeType() && !Columns.IsColumnPossible(ctx, *name) && !IsAlias(EExprSeat::GroupBy, *name) && !IsAlias(EExprSeat::DistinctAggr, *name)) {
  357. if (column.IsReliable()) {
  358. TStringBuilder sb;
  359. sb << "Column " << *name << " is not in source column set";
  360. if (const auto mistype = FindColumnMistype(*name)) {
  361. sb << ". Did you mean " << mistype.GetRef() << "?";
  362. }
  363. ctx.Error(column.GetPos()) << sb;
  364. }
  365. return {};
  366. }
  367. return true;
  368. }
  369. TMaybe<TString> FindColumnMistype(const TString& name) const override {
  370. auto result = FindMistypeIn(Columns.Real, name);
  371. if (!result) {
  372. auto result = FindMistypeIn(Columns.Artificial, name);
  373. }
  374. return result ? result : ISource::FindColumnMistype(name);
  375. }
  376. protected:
  377. TColumns Columns;
  378. };
  379. class IComposableSource : private TNonCopyable {
  380. public:
  381. virtual ~IComposableSource() = default;
  382. virtual void BuildProjectWindowDistinct(TNodePtr& blocks, TContext& ctx, bool ordered) = 0;
  383. };
  384. using TComposableSourcePtr = TIntrusivePtr<IComposableSource>;
  385. class TMuxSource: public ISource {
  386. public:
  387. TMuxSource(TPosition pos, TVector<TSourcePtr>&& sources)
  388. : ISource(pos)
  389. , Sources(std::move(sources))
  390. {
  391. YQL_ENSURE(Sources.size() > 1);
  392. }
  393. void AllColumns() final {
  394. for (auto& source: Sources) {
  395. source->AllColumns();
  396. }
  397. }
  398. const TColumns* GetColumns() const final {
  399. // Columns are equal in all sources. Return from the first one
  400. return Sources.front()->GetColumns();
  401. }
  402. void GetInputTables(TTableList& tableList) const final {
  403. for (auto& source: Sources) {
  404. source->GetInputTables(tableList);
  405. }
  406. ISource::GetInputTables(tableList);
  407. }
  408. bool IsStream() const final {
  409. return AnyOf(Sources, [] (const TSourcePtr& s) { return s->IsStream(); });
  410. }
  411. bool DoInit(TContext& ctx, ISource* src) final {
  412. for (auto& source: Sources) {
  413. if (AsInner) {
  414. source->UseAsInner();
  415. }
  416. if (src) {
  417. src->AddDependentSource(source.Get());
  418. }
  419. if (!source->Init(ctx, src)) {
  420. return false;
  421. }
  422. if (!source->InitFilters(ctx)) {
  423. return false;
  424. }
  425. }
  426. return true;
  427. }
  428. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) final {
  429. for (auto& source: Sources) {
  430. if (!source->AddColumn(ctx, column)) {
  431. return {};
  432. }
  433. }
  434. return true;
  435. }
  436. TNodePtr Build(TContext& ctx) final {
  437. TNodePtr block;
  438. auto muxArgs = Y();
  439. for (size_t i = 0; i < Sources.size(); ++i) {
  440. auto& source = Sources[i];
  441. auto input = source->Build(ctx);
  442. auto ref = ctx.MakeName("src");
  443. muxArgs->Add(ref);
  444. if (block) {
  445. block = L(block, Y("let", ref, input));
  446. } else {
  447. block = Y(Y("let", ref, input));
  448. }
  449. auto filter = source->BuildFilter(ctx, ref);
  450. if (filter) {
  451. block = L(block, Y("let", ref, filter));
  452. }
  453. if (ctx.EnableSystemColumns) {
  454. block = L(block, Y("let", ref, Y("RemoveSystemMembers", ref)));
  455. }
  456. }
  457. return GroundWithExpr(block, Y("Mux", Q(muxArgs)));
  458. }
  459. bool AddFilter(TContext& ctx, TNodePtr filter) final {
  460. Y_UNUSED(filter);
  461. ctx.Error() << "Filter is not allowed for multiple sources";
  462. return false;
  463. }
  464. TPtr DoClone() const final {
  465. return new TMuxSource(Pos, CloneContainer(Sources));
  466. }
  467. protected:
  468. TVector<TSourcePtr> Sources;
  469. };
  470. TSourcePtr BuildMuxSource(TPosition pos, TVector<TSourcePtr>&& sources) {
  471. return new TMuxSource(pos, std::move(sources));
  472. }
  473. class TSubqueryRefNode: public IRealSource {
  474. public:
  475. TSubqueryRefNode(const TNodePtr& subquery, const TString& alias, int tupleIndex)
  476. : IRealSource(subquery->GetPos())
  477. , Subquery(subquery)
  478. , Alias(alias)
  479. , TupleIndex(tupleIndex)
  480. {
  481. YQL_ENSURE(subquery->GetSource());
  482. }
  483. ISource* GetSource() override {
  484. return this;
  485. }
  486. bool DoInit(TContext& ctx, ISource* src) override {
  487. // independent subquery should not connect source
  488. Subquery->UseAsInner();
  489. if (!Subquery->Init(ctx, nullptr)) {
  490. return false;
  491. }
  492. Columns = *Subquery->GetSource()->GetColumns();
  493. Node = BuildAtom(Pos, Alias, TNodeFlags::Default);
  494. if (TupleIndex != -1) {
  495. Node = Y("Nth", Node, Q(ToString(TupleIndex)));
  496. }
  497. if (!Node->Init(ctx, src)) {
  498. return false;
  499. }
  500. if (src && Subquery->GetSource()->IsSelect()) {
  501. auto columnsPtr = &Columns;
  502. if (columnsPtr && (columnsPtr->All || columnsPtr->QualifiedAll || columnsPtr->List.size() == 1)) {
  503. Node = Y("SingleMember", Y("SqlAccess", Q("dict"), Y("Take", Node, Y("Uint64", Q("1"))), Y("Uint64", Q("0"))));
  504. } else {
  505. ctx.Error(Pos) << "Source used in expression should contain one concrete column";
  506. if (RefPos) {
  507. ctx.Error(*RefPos) << "Source is used here";
  508. }
  509. return false;
  510. }
  511. }
  512. TNodePtr sample;
  513. if (!BuildSamplingLambda(sample)) {
  514. return false;
  515. } else if (sample) {
  516. Node = Y("block", Q(Y(Y("let", Node, Y("OrderedFlatMap", Node, sample)), Y("return", Node))));
  517. }
  518. return true;
  519. }
  520. TNodePtr Build(TContext& ctx) override {
  521. Y_UNUSED(ctx);
  522. return Node;
  523. }
  524. bool SetSamplingOptions(
  525. TContext& ctx,
  526. TPosition pos,
  527. ESampleClause sampleClause,
  528. ESampleMode mode,
  529. TNodePtr samplingRate,
  530. TNodePtr samplingSeed) override {
  531. if (mode == ESampleMode::System) {
  532. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  533. return false;
  534. }
  535. if (samplingSeed) {
  536. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  537. return false;
  538. }
  539. return SetSamplingRate(ctx, sampleClause, samplingRate);
  540. }
  541. bool IsStream() const override {
  542. return Subquery->GetSource()->IsStream();
  543. }
  544. void DoUpdateState() const override {
  545. State.Set(ENodeState::Const, true);
  546. }
  547. TAstNode* Translate(TContext& ctx) const override {
  548. Y_DEBUG_ABORT_UNLESS(Node);
  549. return Node->Translate(ctx);
  550. }
  551. TPtr DoClone() const final {
  552. return new TSubqueryRefNode(Subquery, Alias, TupleIndex);
  553. }
  554. protected:
  555. TNodePtr Subquery;
  556. const TString Alias;
  557. const int TupleIndex;
  558. TNodePtr Node;
  559. };
  560. TNodePtr BuildSubqueryRef(TNodePtr subquery, const TString& alias, int tupleIndex) {
  561. return new TSubqueryRefNode(std::move(subquery), alias, tupleIndex);
  562. }
  563. class TInvalidSubqueryRefNode: public ISource {
  564. public:
  565. TInvalidSubqueryRefNode(TPosition pos)
  566. : ISource(pos)
  567. , Pos(pos)
  568. {
  569. }
  570. bool DoInit(TContext& ctx, ISource* src) override {
  571. Y_UNUSED(src);
  572. ctx.Error(Pos) << "Named subquery can not be used as a top level statement in libraries";
  573. return false;
  574. }
  575. TNodePtr Build(TContext& ctx) override {
  576. Y_UNUSED(ctx);
  577. return {};
  578. }
  579. TPtr DoClone() const final {
  580. return new TInvalidSubqueryRefNode(Pos);
  581. }
  582. protected:
  583. const TPosition Pos;
  584. };
  585. TNodePtr BuildInvalidSubqueryRef(TPosition subqueryPos) {
  586. return new TInvalidSubqueryRefNode(subqueryPos);
  587. }
  588. class TTableSource: public IRealSource {
  589. public:
  590. TTableSource(TPosition pos, const TTableRef& table, const TString& label)
  591. : IRealSource(pos)
  592. , Table(table)
  593. , FakeSource(BuildFakeSource(pos))
  594. {
  595. SetLabel(label.empty() ? Table.ShortName() : label);
  596. }
  597. void GetInputTables(TTableList& tableList) const override {
  598. tableList.push_back(Table);
  599. ISource::GetInputTables(tableList);
  600. }
  601. bool ShouldUseSourceAsColumn(const TString& source) const override {
  602. const auto& label = GetLabel();
  603. return source && source != label && !(label.StartsWith(source) && label[source.size()] == ':');
  604. }
  605. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  606. Columns.Add(column.GetColumnName(), column.GetCountHint(), column.IsArtificial(), column.IsReliable());
  607. if (!IRealSource::AddColumn(ctx, column)) {
  608. return {};
  609. }
  610. return false;
  611. }
  612. bool SetSamplingOptions(
  613. TContext& ctx,
  614. TPosition pos,
  615. ESampleClause sampleClause,
  616. ESampleMode mode,
  617. TNodePtr samplingRate,
  618. TNodePtr samplingSeed) override
  619. {
  620. Y_UNUSED(pos);
  621. TString modeName;
  622. if (!samplingSeed) {
  623. samplingSeed = Y("Int32", Q("0"));
  624. }
  625. if (ESampleClause::Sample == sampleClause) {
  626. YQL_ENSURE(ESampleMode::Bernoulli == mode, "Internal logic error");
  627. }
  628. switch (mode) {
  629. case ESampleMode::Bernoulli:
  630. modeName = "bernoulli";
  631. break;
  632. case ESampleMode::System:
  633. modeName = "system";
  634. break;
  635. }
  636. if (!samplingRate->Init(ctx, FakeSource.Get())) {
  637. return false;
  638. }
  639. samplingRate = PrepareSamplingRate(pos, sampleClause, samplingRate);
  640. auto sampleSettings = Q(Y(Q(modeName), Y("EvaluateAtom", Y("ToString", samplingRate)), Y("EvaluateAtom", Y("ToString", samplingSeed))));
  641. auto sampleOption = Q(Y(Q("sample"), sampleSettings));
  642. if (Table.Options) {
  643. if (!Table.Options->Init(ctx, this)) {
  644. return false;
  645. }
  646. Table.Options = L(Table.Options, sampleOption);
  647. } else {
  648. Table.Options = Y(sampleOption);
  649. }
  650. return true;
  651. }
  652. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  653. Y_UNUSED(ctx);
  654. TTableHints merged = contextHints;
  655. MergeHints(merged, hints);
  656. Table.Options = BuildInputOptions(pos, merged);
  657. return true;
  658. }
  659. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  660. return Table.Keys->SetViewName(ctx, pos, view);
  661. }
  662. TNodePtr Build(TContext& ctx) override {
  663. if (!Table.Keys->Init(ctx, nullptr)) {
  664. return nullptr;
  665. }
  666. return AstNode(Table.RefName);
  667. }
  668. bool IsStream() const override {
  669. return IsStreamingService(Table.Service);
  670. }
  671. TPtr DoClone() const final {
  672. return new TTableSource(Pos, Table, GetLabel());
  673. }
  674. bool IsTableSource() const override {
  675. return true;
  676. }
  677. protected:
  678. TTableRef Table;
  679. private:
  680. const TSourcePtr FakeSource;
  681. };
  682. TSourcePtr BuildTableSource(TPosition pos, const TTableRef& table, const TString& label) {
  683. return new TTableSource(pos, table, label);
  684. }
  685. class TInnerSource: public IProxySource {
  686. public:
  687. TInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label)
  688. : IProxySource(pos, nullptr)
  689. , Node(node)
  690. , Service(service)
  691. , Cluster(cluster)
  692. {
  693. SetLabel(label);
  694. }
  695. bool SetSamplingOptions(TContext& ctx, TPosition pos, ESampleClause sampleClause, ESampleMode mode, TNodePtr samplingRate, TNodePtr samplingSeed) override {
  696. Y_UNUSED(ctx);
  697. SamplingPos = pos;
  698. SamplingClause = sampleClause;
  699. SamplingMode = mode;
  700. SamplingRate = samplingRate;
  701. SamplingSeed = samplingSeed;
  702. return true;
  703. }
  704. bool SetTableHints(TContext& ctx, TPosition pos, const TTableHints& hints, const TTableHints& contextHints) override {
  705. Y_UNUSED(ctx);
  706. HintsPos = pos;
  707. Hints = hints;
  708. ContextHints = contextHints;
  709. return true;
  710. }
  711. bool SetViewName(TContext& ctx, TPosition pos, const TString& view) override {
  712. Y_UNUSED(ctx);
  713. ViewPos = pos;
  714. View = view;
  715. return true;
  716. }
  717. bool ShouldUseSourceAsColumn(const TString& source) const override {
  718. return source && source != GetLabel();
  719. }
  720. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  721. if (const TString* columnName = column.GetColumnName()) {
  722. if (columnName && IsExprAlias(*columnName)) {
  723. return true;
  724. }
  725. }
  726. return IProxySource::AddColumn(ctx, column);
  727. }
  728. bool DoInit(TContext& ctx, ISource* initSrc) override {
  729. Y_UNUSED(initSrc);
  730. auto source = Node->GetSource();
  731. if (!source) {
  732. NewSource = TryMakeSourceFromExpression(Pos, ctx, Service, Cluster, Node);
  733. source = NewSource.Get();
  734. }
  735. if (!source) {
  736. ctx.Error(Pos) << "Invalid inner source node";
  737. return false;
  738. }
  739. if (SamplingPos) {
  740. if (!source->SetSamplingOptions(ctx, *SamplingPos, SamplingClause, SamplingMode, SamplingRate, SamplingSeed)) {
  741. return false;
  742. }
  743. }
  744. if (ViewPos) {
  745. if (!source->SetViewName(ctx, *ViewPos, View)) {
  746. return false;
  747. }
  748. }
  749. if (HintsPos) {
  750. if (!source->SetTableHints(ctx, *HintsPos, Hints, ContextHints)) {
  751. return false;
  752. }
  753. }
  754. source->SetLabel(Label);
  755. if (!NewSource) {
  756. Node->UseAsInner();
  757. if (!Node->Init(ctx, nullptr)) {
  758. return false;
  759. }
  760. }
  761. SetSource(source);
  762. if (NewSource && !NewSource->Init(ctx, nullptr)) {
  763. return false;
  764. }
  765. return ISource::DoInit(ctx, source);
  766. }
  767. TNodePtr Build(TContext& ctx) override {
  768. Y_UNUSED(ctx);
  769. return NewSource ? NewSource->Build(ctx) : Node;
  770. }
  771. bool IsStream() const override {
  772. auto source = Node->GetSource();
  773. if (source) {
  774. return source->IsStream();
  775. }
  776. // NewSource will be built later in DoInit->TryMakeSourceFromExpression
  777. // where Service will be used in all situations
  778. // let's detect IsStream by Service value
  779. return IsStreamingService(Service);
  780. }
  781. TPtr DoClone() const final {
  782. return new TInnerSource(Pos, SafeClone(Node), Service, Cluster, GetLabel());
  783. }
  784. protected:
  785. TNodePtr Node;
  786. TString Service;
  787. TDeferredAtom Cluster;
  788. TSourcePtr NewSource;
  789. private:
  790. TMaybe<TPosition> SamplingPos;
  791. ESampleClause SamplingClause;
  792. ESampleMode SamplingMode;
  793. TNodePtr SamplingRate;
  794. TNodePtr SamplingSeed;
  795. TMaybe<TPosition> ViewPos;
  796. TString View;
  797. TMaybe<TPosition> HintsPos;
  798. TTableHints Hints;
  799. TTableHints ContextHints;
  800. };
  801. TSourcePtr BuildInnerSource(TPosition pos, TNodePtr node, const TString& service, const TDeferredAtom& cluster, const TString& label) {
  802. return new TInnerSource(pos, node, service, cluster, label);
  803. }
  804. static bool IsComparableExpression(TContext& ctx, const TNodePtr& expr, bool assume, const char* sqlConstruction) {
  805. if (assume && !expr->IsPlainColumn()) {
  806. ctx.Error(expr->GetPos()) << "Only column names can be used in " << sqlConstruction;
  807. return false;
  808. }
  809. if (expr->IsConstant()) {
  810. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " constant expression";
  811. return false;
  812. }
  813. if (expr->IsAggregated() && !expr->HasState(ENodeState::AggregationKey)) {
  814. ctx.Error(expr->GetPos()) << "Unable to " << sqlConstruction << " aggregated values";
  815. return false;
  816. }
  817. if (expr->IsPlainColumn()) {
  818. return true;
  819. }
  820. if (expr->GetOpName().empty()) {
  821. ctx.Error(expr->GetPos()) << "You should use in " << sqlConstruction << " column name, qualified field, callable function or expression";
  822. return false;
  823. }
  824. return true;
  825. }
  826. /// \todo move to reduce.cpp? or mapreduce.cpp?
  827. class TReduceSource: public IRealSource {
  828. public:
  829. TReduceSource(TPosition pos,
  830. ReduceMode mode,
  831. TSourcePtr source,
  832. TVector<TSortSpecificationPtr>&& orderBy,
  833. TVector<TNodePtr>&& keys,
  834. TVector<TNodePtr>&& args,
  835. TNodePtr udf,
  836. TNodePtr having,
  837. const TWriteSettings& settings,
  838. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  839. bool listCall)
  840. : IRealSource(pos)
  841. , Mode(mode)
  842. , Source(std::move(source))
  843. , OrderBy(std::move(orderBy))
  844. , Keys(std::move(keys))
  845. , Args(std::move(args))
  846. , Udf(udf)
  847. , Having(having)
  848. , Settings(settings)
  849. , AssumeOrderBy(assumeOrderBy)
  850. , ListCall(listCall)
  851. {
  852. YQL_ENSURE(!Keys.empty());
  853. YQL_ENSURE(Source);
  854. }
  855. void GetInputTables(TTableList& tableList) const override {
  856. Source->GetInputTables(tableList);
  857. ISource::GetInputTables(tableList);
  858. }
  859. bool DoInit(TContext& ctx, ISource* src) final {
  860. if (AsInner) {
  861. Source->UseAsInner();
  862. }
  863. YQL_ENSURE(!src);
  864. if (!Source->Init(ctx, src)) {
  865. return false;
  866. }
  867. if (!Source->InitFilters(ctx)) {
  868. return false;
  869. }
  870. src = Source.Get();
  871. for (auto& key: Keys) {
  872. if (!key->Init(ctx, src)) {
  873. return false;
  874. }
  875. auto keyNamePtr = key->GetColumnName();
  876. YQL_ENSURE(keyNamePtr);
  877. if (!src->AddGroupKey(ctx, *keyNamePtr)) {
  878. return false;
  879. }
  880. }
  881. if (Having && !Having->Init(ctx, nullptr)) {
  882. return false;
  883. }
  884. /// SIN: verify reduce one argument
  885. if (Args.size() != 1) {
  886. ctx.Error(Pos) << "REDUCE requires exactly one UDF argument";
  887. return false;
  888. }
  889. if (!Args[0]->Init(ctx, src)) {
  890. return false;
  891. }
  892. for (auto orderSpec: OrderBy) {
  893. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  894. return false;
  895. }
  896. }
  897. if (!Udf->Init(ctx, src)) {
  898. return false;
  899. }
  900. if (Udf->GetLabel().empty()) {
  901. Columns.SetAll();
  902. } else {
  903. Columns.Add(&Udf->GetLabel(), false);
  904. }
  905. const auto label = GetLabel();
  906. for (const auto& sortSpec: AssumeOrderBy) {
  907. auto& expr = sortSpec->OrderExpr;
  908. SetLabel(Source->GetLabel());
  909. if (!expr->Init(ctx, this)) {
  910. return false;
  911. }
  912. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  913. return false;
  914. }
  915. }
  916. SetLabel(label);
  917. return true;
  918. }
  919. TNodePtr Build(TContext& ctx) final {
  920. auto input = Source->Build(ctx);
  921. if (!input) {
  922. return nullptr;
  923. }
  924. auto keysTuple = Y();
  925. if (Keys.size() == 1) {
  926. keysTuple = Y("Member", "row", BuildQuotedAtom(Pos, *Keys.back()->GetColumnName()));
  927. }
  928. else {
  929. for (const auto& key: Keys) {
  930. keysTuple = L(keysTuple, Y("Member", "row", BuildQuotedAtom(Pos, *key->GetColumnName())));
  931. }
  932. keysTuple = Q(keysTuple);
  933. }
  934. auto extractKey = Y("SqlExtractKey", "row", BuildLambda(Pos, Y("row"), keysTuple));
  935. auto extractKeyLambda = BuildLambda(Pos, Y("row"), extractKey);
  936. TNodePtr processPartitions;
  937. if (ListCall) {
  938. if (Mode != ReduceMode::ByAll) {
  939. ctx.Error(Pos) << "TableRows() must be used only with USING ALL";
  940. return nullptr;
  941. }
  942. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  943. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAllList", TNodeFlags::Default), Udf, expr);
  944. } else {
  945. switch (Mode) {
  946. case ReduceMode::ByAll: {
  947. auto columnPtr = Args[0]->GetColumnName();
  948. TNodePtr expr = BuildAtom(Pos, "partitionStream");
  949. if (!columnPtr || *columnPtr != "*") {
  950. expr = Y("Map", "partitionStream", BuildLambda(Pos, Y("keyPair"), Q(L(Y(),\
  951. Y("Nth", "keyPair", Q(ToString("0"))),\
  952. Y("Map", Y("Nth", "keyPair", Q(ToString("1"))), BuildLambda(Pos, Y("row"), Args[0]))))));
  953. }
  954. processPartitions = Y("SqlReduce", "partitionStream", BuildQuotedAtom(Pos, "byAll", TNodeFlags::Default), Udf, expr);
  955. break;
  956. }
  957. case ReduceMode::ByPartition: {
  958. processPartitions = Y("SqlReduce", "partitionStream", extractKeyLambda, Udf,
  959. BuildLambda(Pos, Y("row"), Args[0]));
  960. break;
  961. }
  962. default:
  963. YQL_ENSURE(false, "Unexpected REDUCE mode");
  964. }
  965. }
  966. TNodePtr sortDirection;
  967. TNodePtr sortKeySelector;
  968. FillSortParts(OrderBy, sortDirection, sortKeySelector);
  969. if (!OrderBy.empty()) {
  970. sortKeySelector = BuildLambda(Pos, Y("row"), Y("SqlExtractKey", "row", sortKeySelector));
  971. }
  972. auto partitionByKey = Y(!ListCall && Mode == ReduceMode::ByAll ? "PartitionByKey" : "PartitionsByKeys", "core", extractKeyLambda,
  973. sortDirection, sortKeySelector, BuildLambda(Pos, Y("partitionStream"), processPartitions));
  974. auto inputLabel = ListCall ? "inputRowsList" : "core";
  975. auto block(Y(Y("let", inputLabel, input)));
  976. auto filter = Source->BuildFilter(ctx, inputLabel);
  977. if (filter) {
  978. block = L(block, Y("let", inputLabel, filter));
  979. }
  980. if (ListCall) {
  981. block = L(block, Y("let", "core", "inputRowsList"));
  982. }
  983. if (ctx.EnableSystemColumns) {
  984. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  985. }
  986. block = L(block, Y("let", "core", Y("AutoDemux", partitionByKey)));
  987. if (Having) {
  988. block = L(block, Y("let", "core",
  989. Y("Filter", "core", BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false")))))
  990. ));
  991. }
  992. return Y("block", Q(L(block, Y("return", "core"))));
  993. }
  994. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  995. Y_UNUSED(ctx);
  996. if (AssumeOrderBy.empty()) {
  997. return nullptr;
  998. }
  999. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  1000. }
  1001. EOrderKind GetOrderKind() const override {
  1002. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  1003. }
  1004. TWriteSettings GetWriteSettings() const final {
  1005. return Settings;
  1006. }
  1007. bool HasSelectResult() const final {
  1008. return !Settings.Discard;
  1009. }
  1010. TPtr DoClone() const final {
  1011. return new TReduceSource(Pos, Mode, Source->CloneSource(), CloneContainer(OrderBy),
  1012. CloneContainer(Keys), CloneContainer(Args), SafeClone(Udf), SafeClone(Having), Settings,
  1013. CloneContainer(AssumeOrderBy), ListCall);
  1014. }
  1015. private:
  1016. ReduceMode Mode;
  1017. TSourcePtr Source;
  1018. TVector<TSortSpecificationPtr> OrderBy;
  1019. TVector<TNodePtr> Keys;
  1020. TVector<TNodePtr> Args;
  1021. TNodePtr Udf;
  1022. TNodePtr Having;
  1023. const TWriteSettings Settings;
  1024. TVector<TSortSpecificationPtr> AssumeOrderBy;
  1025. const bool ListCall;
  1026. };
  1027. TSourcePtr BuildReduce(TPosition pos,
  1028. ReduceMode mode,
  1029. TSourcePtr source,
  1030. TVector<TSortSpecificationPtr>&& orderBy,
  1031. TVector<TNodePtr>&& keys,
  1032. TVector<TNodePtr>&& args,
  1033. TNodePtr udf,
  1034. TNodePtr having,
  1035. const TWriteSettings& settings,
  1036. const TVector<TSortSpecificationPtr>& assumeOrderBy,
  1037. bool listCall) {
  1038. return new TReduceSource(pos, mode, std::move(source), std::move(orderBy), std::move(keys),
  1039. std::move(args), udf, having, settings, assumeOrderBy, listCall);
  1040. }
  1041. namespace {
  1042. bool InitAndGetGroupKey(TContext& ctx, const TNodePtr& expr, ISource* src, TStringBuf where, TString& keyColumn) {
  1043. keyColumn.clear();
  1044. YQL_ENSURE(src);
  1045. const bool isJoin = src->GetJoin();
  1046. if (!expr->Init(ctx, src)) {
  1047. return false;
  1048. }
  1049. auto keyNamePtr = expr->GetColumnName();
  1050. if (keyNamePtr && expr->GetLabel().empty()) {
  1051. keyColumn = *keyNamePtr;
  1052. auto sourceNamePtr = expr->GetSourceName();
  1053. auto columnNode = expr->GetColumnNode();
  1054. if (isJoin && (!columnNode || !columnNode->IsArtificial())) {
  1055. if (!sourceNamePtr || sourceNamePtr->empty()) {
  1056. if (!src->IsAlias(EExprSeat::GroupBy, keyColumn)) {
  1057. ctx.Error(expr->GetPos()) << "Columns in " << where << " should have correlation name, error in key: " << keyColumn;
  1058. return false;
  1059. }
  1060. } else {
  1061. keyColumn = DotJoin(*sourceNamePtr, keyColumn);
  1062. }
  1063. }
  1064. }
  1065. return true;
  1066. }
  1067. }
  1068. class TCompositeSelect: public IRealSource {
  1069. public:
  1070. TCompositeSelect(TPosition pos, TSourcePtr source, TSourcePtr originalSource, const TWriteSettings& settings)
  1071. : IRealSource(pos)
  1072. , Source(std::move(source))
  1073. , OriginalSource(std::move(originalSource))
  1074. , Settings(settings)
  1075. {
  1076. YQL_ENSURE(Source);
  1077. }
  1078. void SetSubselects(TVector<TSourcePtr>&& subselects, TVector<TNodePtr>&& grouping, TVector<TNodePtr>&& groupByExpr) {
  1079. Subselects = std::move(subselects);
  1080. Grouping = std::move(grouping);
  1081. GroupByExpr = std::move(groupByExpr);
  1082. Y_DEBUG_ABORT_UNLESS(Subselects.size() > 1);
  1083. }
  1084. void GetInputTables(TTableList& tableList) const override {
  1085. for (const auto& select: Subselects) {
  1086. select->GetInputTables(tableList);
  1087. }
  1088. ISource::GetInputTables(tableList);
  1089. }
  1090. bool DoInit(TContext& ctx, ISource* src) override {
  1091. if (AsInner) {
  1092. Source->UseAsInner();
  1093. }
  1094. if (src) {
  1095. src->AddDependentSource(Source.Get());
  1096. }
  1097. if (!Source->Init(ctx, src)) {
  1098. return false;
  1099. }
  1100. if (!Source->InitFilters(ctx)) {
  1101. return false;
  1102. }
  1103. if (!CalculateGroupingCols(ctx, src)) {
  1104. return false;
  1105. }
  1106. auto origSrc = OriginalSource.Get();
  1107. if (!origSrc->Init(ctx, src)) {
  1108. return false;
  1109. }
  1110. if (origSrc->IsFlattenByColumns() || origSrc->IsFlattenColumns()) {
  1111. Flatten = origSrc->IsFlattenByColumns() ?
  1112. origSrc->BuildFlattenByColumns("row") :
  1113. origSrc->BuildFlattenColumns("row");
  1114. if (!Flatten || !Flatten->Init(ctx, src)) {
  1115. return false;
  1116. }
  1117. }
  1118. if (origSrc->IsFlattenByExprs()) {
  1119. for (auto& expr : static_cast<ISource const*>(origSrc)->Expressions(EExprSeat::FlattenByExpr)) {
  1120. if (!expr->Init(ctx, origSrc)) {
  1121. return false;
  1122. }
  1123. }
  1124. PreFlattenMap = origSrc->BuildPreFlattenMap(ctx);
  1125. if (!PreFlattenMap) {
  1126. return false;
  1127. }
  1128. }
  1129. for (const auto& select: Subselects) {
  1130. select->SetLabel(Label);
  1131. if (AsInner) {
  1132. select->UseAsInner();
  1133. }
  1134. if (!select->Init(ctx, Source.Get())) {
  1135. return false;
  1136. }
  1137. }
  1138. TMaybe<size_t> groupingColumnsCount;
  1139. size_t idx = 0;
  1140. for (const auto& select : Subselects) {
  1141. size_t count = select->GetGroupingColumnsCount();
  1142. if (!groupingColumnsCount.Defined()) {
  1143. groupingColumnsCount = count;
  1144. } else if (*groupingColumnsCount != count) {
  1145. ctx.Error(select->GetPos()) << TStringBuilder() << "Mismatch GROUPING() column count in composite select input #"
  1146. << idx << ": expected " << *groupingColumnsCount << ", got: " << count << ". Please submit bug report";
  1147. return false;
  1148. }
  1149. ++idx;
  1150. }
  1151. return true;
  1152. }
  1153. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1154. for (const auto& select: Subselects) {
  1155. if (!select->AddColumn(ctx, column)) {
  1156. return {};
  1157. }
  1158. }
  1159. return true;
  1160. }
  1161. TNodePtr Build(TContext& ctx) override {
  1162. auto input = Source->Build(ctx);
  1163. auto block(Y(Y("let", "composite", input)));
  1164. bool ordered = ctx.UseUnordered(*this);
  1165. if (PreFlattenMap) {
  1166. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1167. }
  1168. if (Flatten) {
  1169. block = L(block, Y("let", "composite", Y(ordered ? "OrderedFlatMap" : "FlatMap", "composite", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1170. }
  1171. auto filter = Source->BuildFilter(ctx, "composite");
  1172. if (filter) {
  1173. block = L(block, Y("let", "composite", filter));
  1174. }
  1175. TNodePtr compositeNode = Y("UnionAll");
  1176. for (const auto& select: Subselects) {
  1177. YQL_ENSURE(dynamic_cast<IComposableSource*>(select.Get()));
  1178. auto addNode = select->Build(ctx);
  1179. if (!addNode) {
  1180. return nullptr;
  1181. }
  1182. compositeNode->Add(addNode);
  1183. }
  1184. block = L(block, Y("let", "core", compositeNode));
  1185. YQL_ENSURE(!Subselects.empty());
  1186. dynamic_cast<IComposableSource*>(Subselects.front().Get())->BuildProjectWindowDistinct(block, ctx, false);
  1187. return Y("block", Q(L(block, Y("return", "core"))));
  1188. }
  1189. bool IsGroupByColumn(const TString& column) const override {
  1190. YQL_ENSURE(!GroupingCols.empty());
  1191. return GroupingCols.contains(column);
  1192. }
  1193. const TSet<TString>& GetGroupingCols() const {
  1194. return GroupingCols;
  1195. }
  1196. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1197. return Subselects.front()->BuildSort(ctx, label);
  1198. }
  1199. EOrderKind GetOrderKind() const override {
  1200. return Subselects.front()->GetOrderKind();
  1201. }
  1202. const TColumns* GetColumns() const override{
  1203. return Subselects.front()->GetColumns();
  1204. }
  1205. ISource* RealSource() const {
  1206. return Source.Get();
  1207. }
  1208. TWriteSettings GetWriteSettings() const override {
  1209. return Settings;
  1210. }
  1211. bool HasSelectResult() const override {
  1212. return !Settings.Discard;
  1213. }
  1214. TNodePtr DoClone() const final {
  1215. auto newSource = MakeIntrusive<TCompositeSelect>(Pos, Source->CloneSource(), OriginalSource->CloneSource(), Settings);
  1216. newSource->SetSubselects(CloneContainer(Subselects), CloneContainer(Grouping), CloneContainer(GroupByExpr));
  1217. return newSource;
  1218. }
  1219. private:
  1220. bool CalculateGroupingCols(TContext& ctx, ISource* initSrc) {
  1221. auto origSrc = OriginalSource->CloneSource();
  1222. if (!origSrc->Init(ctx, initSrc)) {
  1223. return false;
  1224. }
  1225. bool hasError = false;
  1226. for (auto& expr: GroupByExpr) {
  1227. if (!expr->Init(ctx, origSrc.Get()) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1228. hasError = true;
  1229. }
  1230. }
  1231. if (!origSrc->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1232. hasError = true;
  1233. }
  1234. YQL_ENSURE(!Grouping.empty());
  1235. for (auto& grouping : Grouping) {
  1236. TString keyColumn;
  1237. if (!InitAndGetGroupKey(ctx, grouping, origSrc.Get(), "grouping sets", keyColumn)) {
  1238. hasError = true;
  1239. } else if (!keyColumn.empty()) {
  1240. GroupingCols.insert(keyColumn);
  1241. }
  1242. }
  1243. return !hasError;
  1244. }
  1245. TSourcePtr Source;
  1246. TSourcePtr OriginalSource;
  1247. TNodePtr Flatten;
  1248. TNodePtr PreFlattenMap;
  1249. const TWriteSettings Settings;
  1250. TVector<TSourcePtr> Subselects;
  1251. TVector<TNodePtr> Grouping;
  1252. TVector<TNodePtr> GroupByExpr;
  1253. TSet<TString> GroupingCols;
  1254. };
  1255. namespace {
  1256. TString FullColumnName(const TColumnNode& column) {
  1257. YQL_ENSURE(column.GetColumnName());
  1258. TString columnName = *column.GetColumnName();
  1259. if (column.IsUseSource()) {
  1260. columnName = DotJoin(*column.GetSourceName(), columnName);
  1261. }
  1262. return columnName;
  1263. }
  1264. }
  1265. /// \todo simplify class
  1266. class TSelectCore: public IRealSource, public IComposableSource {
  1267. public:
  1268. TSelectCore(
  1269. TPosition pos,
  1270. TSourcePtr source,
  1271. const TVector<TNodePtr>& groupByExpr,
  1272. const TVector<TNodePtr>& groupBy,
  1273. bool compactGroupBy,
  1274. const TString& groupBySuffix,
  1275. bool assumeSorted,
  1276. const TVector<TSortSpecificationPtr>& orderBy,
  1277. TNodePtr having,
  1278. const TWinSpecs& winSpecs,
  1279. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  1280. const TVector<TNodePtr>& terms,
  1281. bool distinct,
  1282. const TVector<TNodePtr>& without,
  1283. bool forceWithout,
  1284. bool selectStream,
  1285. const TWriteSettings& settings,
  1286. TColumnsSets&& uniqueSets,
  1287. TColumnsSets&& distinctSets
  1288. )
  1289. : IRealSource(pos)
  1290. , Source(std::move(source))
  1291. , GroupByExpr(groupByExpr)
  1292. , GroupBy(groupBy)
  1293. , AssumeSorted(assumeSorted)
  1294. , CompactGroupBy(compactGroupBy)
  1295. , GroupBySuffix(groupBySuffix)
  1296. , OrderBy(orderBy)
  1297. , Having(having)
  1298. , WinSpecs(winSpecs)
  1299. , Terms(terms)
  1300. , Without(without)
  1301. , ForceWithout(forceWithout)
  1302. , Distinct(distinct)
  1303. , LegacyHoppingWindowSpec(legacyHoppingWindowSpec)
  1304. , SelectStream(selectStream)
  1305. , Settings(settings)
  1306. , UniqueSets(std::move(uniqueSets))
  1307. , DistinctSets(std::move(distinctSets))
  1308. {
  1309. }
  1310. void AllColumns() override {
  1311. if (!OrderByInit) {
  1312. Columns.SetAll();
  1313. }
  1314. }
  1315. void GetInputTables(TTableList& tableList) const override {
  1316. Source->GetInputTables(tableList);
  1317. ISource::GetInputTables(tableList);
  1318. }
  1319. size_t GetGroupingColumnsCount() const override {
  1320. return Source->GetGroupingColumnsCount();
  1321. }
  1322. bool DoInit(TContext& ctx, ISource* initSrc) override {
  1323. if (AsInner) {
  1324. Source->UseAsInner();
  1325. }
  1326. if (!Source->Init(ctx, initSrc)) {
  1327. return false;
  1328. }
  1329. if (SelectStream && !Source->IsStream()) {
  1330. ctx.Error(Pos) << "SELECT STREAM is unsupported for non-streaming sources";
  1331. return false;
  1332. }
  1333. auto src = Source.Get();
  1334. bool hasError = false;
  1335. if (src->IsFlattenByExprs()) {
  1336. for (auto& expr : static_cast<ISource const*>(src)->Expressions(EExprSeat::FlattenByExpr)) {
  1337. if (!expr->Init(ctx, src)) {
  1338. hasError = true;
  1339. continue;
  1340. }
  1341. }
  1342. }
  1343. if (hasError) {
  1344. return false;
  1345. }
  1346. src->SetCompactGroupBy(CompactGroupBy);
  1347. src->SetGroupBySuffix(GroupBySuffix);
  1348. for (auto& term: Terms) {
  1349. term->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1350. }
  1351. if (Having) {
  1352. Having->CollectPreaggregateExprs(ctx, *src, DistinctAggrExpr);
  1353. }
  1354. for (auto& expr: GroupByExpr) {
  1355. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(expr.Get())) {
  1356. if (Source->IsStream()) {
  1357. ctx.Error(Pos) << "SessionWindow is unsupported for streaming sources";
  1358. return false;
  1359. }
  1360. sessionWindow->MarkValid();
  1361. }
  1362. if (auto hoppingWindow = dynamic_cast<THoppingWindow*>(expr.Get())) {
  1363. hoppingWindow->MarkValid();
  1364. }
  1365. // need to collect and Init() preaggregated exprs before calling Init() on GROUP BY expression
  1366. TVector<TNodePtr> distinctAggrsInGroupBy;
  1367. expr->CollectPreaggregateExprs(ctx, *src, distinctAggrsInGroupBy);
  1368. for (auto& distinct : distinctAggrsInGroupBy) {
  1369. if (!distinct->Init(ctx, src)) {
  1370. return false;
  1371. }
  1372. }
  1373. DistinctAggrExpr.insert(DistinctAggrExpr.end(), distinctAggrsInGroupBy.begin(), distinctAggrsInGroupBy.end());
  1374. if (!expr->Init(ctx, src) || !IsComparableExpression(ctx, expr, false, "GROUP BY")) {
  1375. hasError = true;
  1376. }
  1377. }
  1378. if (hasError || !src->AddExpressions(ctx, GroupByExpr, EExprSeat::GroupBy)) {
  1379. return false;
  1380. }
  1381. for (auto& expr: DistinctAggrExpr) {
  1382. if (!expr->Init(ctx, src)) {
  1383. hasError = true;
  1384. }
  1385. }
  1386. if (hasError || !src->AddExpressions(ctx, DistinctAggrExpr, EExprSeat::DistinctAggr)) {
  1387. return false;
  1388. }
  1389. /// grouped expressions are available in filters
  1390. if (!Source->InitFilters(ctx)) {
  1391. return false;
  1392. }
  1393. for (auto& expr: GroupBy) {
  1394. TString usedColumn;
  1395. if (!InitAndGetGroupKey(ctx, expr, src, "GROUP BY", usedColumn)) {
  1396. hasError = true;
  1397. } else if (usedColumn) {
  1398. if (!src->AddGroupKey(ctx, usedColumn)) {
  1399. hasError = true;
  1400. }
  1401. }
  1402. }
  1403. if (hasError) {
  1404. return false;
  1405. }
  1406. if (Having && !Having->Init(ctx, src)) {
  1407. return false;
  1408. }
  1409. src->AddWindowSpecs(WinSpecs);
  1410. const bool isJoin = Source->GetJoin();
  1411. if (!InitSelect(ctx, src, isJoin, hasError)) {
  1412. return false;
  1413. }
  1414. src->FinishColumns();
  1415. auto aggRes = src->BuildAggregation("core", ctx);
  1416. if (!aggRes.second) {
  1417. return false;
  1418. }
  1419. Aggregate = aggRes.first;
  1420. if (src->IsFlattenByColumns() || src->IsFlattenColumns()) {
  1421. Flatten = src->IsFlattenByColumns() ?
  1422. src->BuildFlattenByColumns("row") :
  1423. src->BuildFlattenColumns("row");
  1424. if (!Flatten || !Flatten->Init(ctx, src)) {
  1425. return false;
  1426. }
  1427. }
  1428. if (src->IsFlattenByExprs()) {
  1429. PreFlattenMap = src->BuildPreFlattenMap(ctx);
  1430. if (!PreFlattenMap) {
  1431. return false;
  1432. }
  1433. }
  1434. if (GroupByExpr || DistinctAggrExpr) {
  1435. PreaggregatedMap = src->BuildPreaggregatedMap(ctx);
  1436. if (!PreaggregatedMap) {
  1437. return false;
  1438. }
  1439. }
  1440. if (Aggregate) {
  1441. if (!Aggregate->Init(ctx, src)) {
  1442. return false;
  1443. }
  1444. if (Having) {
  1445. Aggregate = Y(
  1446. "Filter",
  1447. Aggregate,
  1448. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1449. );
  1450. }
  1451. } else if (Having) {
  1452. if (Distinct) {
  1453. Aggregate = Y(
  1454. "Filter",
  1455. "core",
  1456. BuildLambda(Pos, Y("row"), Y("Coalesce", Having, Y("Bool", Q("false"))))
  1457. );
  1458. ctx.Warning(Having->GetPos(), TIssuesIds::YQL_HAVING_WITHOUT_AGGREGATION_IN_SELECT_DISTINCT)
  1459. << "The usage of HAVING without aggregations with SELECT DISTINCT is non-standard and will stop working soon. Please use WHERE instead.";
  1460. } else {
  1461. ctx.Error(Having->GetPos()) << "HAVING with meaning GROUP BY () should be with aggregation function.";
  1462. return false;
  1463. }
  1464. } else if (!Distinct && !GroupBy.empty()) {
  1465. ctx.Error(Pos) << "No aggregations were specified";
  1466. return false;
  1467. }
  1468. if (hasError) {
  1469. return false;
  1470. }
  1471. if (src->IsCalcOverWindow()) {
  1472. if (src->IsExprSeat(EExprSeat::WindowPartitionBy, EExprType::WithExpression)) {
  1473. PrewindowMap = src->BuildPrewindowMap(ctx);
  1474. if (!PrewindowMap) {
  1475. return false;
  1476. }
  1477. }
  1478. CalcOverWindow = src->BuildCalcOverWindow(ctx, "core");
  1479. if (!CalcOverWindow || !CalcOverWindow->Init(ctx, src)) {
  1480. return false;
  1481. }
  1482. }
  1483. return true;
  1484. }
  1485. TNodePtr Build(TContext& ctx) override {
  1486. auto input = Source->Build(ctx);
  1487. if (!input) {
  1488. return nullptr;
  1489. }
  1490. auto block(Y(Y("let", "core", input)));
  1491. if (Source->HasMatchRecognize()) {
  1492. if (auto matchRecognize = Source->BuildMatchRecognize(ctx, "core")) {
  1493. //use unique name match_recognize to find this block easily in unit tests
  1494. block = L(block, Y("let", "match_recognize", matchRecognize));
  1495. //then bind to the conventional name
  1496. block = L(block, Y("let", "core", "match_recognize"));
  1497. } else {
  1498. return nullptr;
  1499. }
  1500. }
  1501. bool ordered = ctx.UseUnordered(*this);
  1502. if (PreFlattenMap) {
  1503. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), PreFlattenMap))));
  1504. }
  1505. if (Flatten) {
  1506. block = L(block, Y("let", "core", Y(ordered ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Flatten, "res"))));
  1507. }
  1508. if (PreaggregatedMap) {
  1509. block = L(block, Y("let", "core", PreaggregatedMap));
  1510. if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1511. block = L(block, Y("let", "preaggregated", "core"));
  1512. }
  1513. } else if (Source->IsCompositeSource() && !Columns.QualifiedAll) {
  1514. block = L(block, Y("let", "origcore", "core"));
  1515. }
  1516. auto filter = Source->BuildFilter(ctx, "core");
  1517. if (filter) {
  1518. block = L(block, Y("let", "core", filter));
  1519. }
  1520. if (Aggregate) {
  1521. block = L(block, Y("let", "core", Aggregate));
  1522. ordered = false;
  1523. }
  1524. const bool haveCompositeTerms = Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty();
  1525. if (haveCompositeTerms) {
  1526. // column order does not matter here - it will be set in projection
  1527. YQL_ENSURE(Aggregate);
  1528. block = L(block, Y("let", "core", Y("Map", "core", BuildLambda(Pos, Y("row"), CompositeTerms, "row"))));
  1529. }
  1530. if (auto grouping = Source->BuildGroupingColumns("core")) {
  1531. block = L(block, Y("let", "core", grouping));
  1532. }
  1533. if (!Source->GetCompositeSource()) {
  1534. BuildProjectWindowDistinct(block, ctx, ordered);
  1535. }
  1536. return Y("block", Q(L(block, Y("return", "core"))));
  1537. }
  1538. void BuildProjectWindowDistinct(TNodePtr& block, TContext& ctx, bool ordered) override {
  1539. if (PrewindowMap) {
  1540. block = L(block, Y("let", "core", PrewindowMap));
  1541. }
  1542. if (CalcOverWindow) {
  1543. block = L(block, Y("let", "core", CalcOverWindow));
  1544. }
  1545. block = L(block, Y("let", "core", Y("PersistableRepr", BuildSqlProject(ctx, ordered))));
  1546. if (Distinct) {
  1547. block = L(block, Y("let", "core", Y("PersistableRepr", Y("SqlAggregateAll", Y("RemoveSystemMembers", "core")))));
  1548. }
  1549. }
  1550. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  1551. Y_UNUSED(ctx);
  1552. if (OrderBy.empty() || DisableSort_) {
  1553. return nullptr;
  1554. }
  1555. auto sorted = BuildSortSpec(OrderBy, label, false, AssumeSorted);
  1556. if (ExtraSortColumns.empty()) {
  1557. return Y("let", label, sorted);
  1558. }
  1559. auto body = Y();
  1560. for (const auto& [column, _] : ExtraSortColumns) {
  1561. body = L(body, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1562. }
  1563. body = L(body, Y("let", "res", "row"));
  1564. return Y("let", label, Y("OrderedMap", sorted, BuildLambda(Pos, Y("row"), body, "res")));
  1565. }
  1566. TNodePtr BuildCleanupColumns(TContext& ctx, const TString& label) override {
  1567. TNodePtr cleanup;
  1568. if (ctx.EnableSystemColumns && ctx.Settings.Mode != NSQLTranslation::ESqlMode::LIMITED_VIEW) {
  1569. if (Columns.All) {
  1570. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1571. } else if (!Columns.List.empty()) {
  1572. const bool isJoin = Source->GetJoin();
  1573. if (!isJoin && Columns.QualifiedAll) {
  1574. if (ctx.SimpleColumns) {
  1575. cleanup = Y("let", label, Y("RemoveSystemMembers", label));
  1576. } else {
  1577. TNodePtr members;
  1578. for (auto& term: Terms) {
  1579. if (term->IsAsterisk()) {
  1580. auto sourceName = term->GetSourceName();
  1581. YQL_ENSURE(*sourceName && !sourceName->empty());
  1582. auto prefix = *sourceName + "._yql_";
  1583. members = members ? L(members, Q(prefix)) : Y(Q(prefix));
  1584. }
  1585. }
  1586. if (members) {
  1587. cleanup = Y("let", label, Y("RemovePrefixMembers", label, Q(members)));
  1588. }
  1589. }
  1590. }
  1591. }
  1592. }
  1593. return cleanup;
  1594. }
  1595. bool IsSelect() const override {
  1596. return true;
  1597. }
  1598. bool HasSelectResult() const override {
  1599. return !Settings.Discard;
  1600. }
  1601. bool IsStream() const override {
  1602. return Source->IsStream();
  1603. }
  1604. EOrderKind GetOrderKind() const override {
  1605. if (OrderBy.empty()) {
  1606. return EOrderKind::None;
  1607. }
  1608. return AssumeSorted ? EOrderKind::Assume : EOrderKind::Sort;
  1609. }
  1610. TWriteSettings GetWriteSettings() const override {
  1611. return Settings;
  1612. }
  1613. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  1614. const bool aggregated = Source->HasAggregations() || Distinct;
  1615. if (OrderByInit && (Source->GetJoin() || !aggregated)) {
  1616. // ORDER BY will try to find column not only in projection items, but also in Source.
  1617. // ```SELECT a, b FROM T ORDER BY c``` should work if c is present in T
  1618. const bool reliable = column.IsReliable();
  1619. column.SetAsNotReliable();
  1620. auto maybeExist = IRealSource::AddColumn(ctx, column);
  1621. if (reliable && !Source->GetJoin()) {
  1622. column.ResetAsReliable();
  1623. }
  1624. if (!maybeExist || !maybeExist.GetRef()) {
  1625. maybeExist = Source->AddColumn(ctx, column);
  1626. }
  1627. if (!maybeExist.Defined()) {
  1628. return maybeExist;
  1629. }
  1630. if (!DisableSort_ && !aggregated && column.GetColumnName() && IsMissingInProjection(ctx, column)) {
  1631. ExtraSortColumns[FullColumnName(column)] = &column;
  1632. }
  1633. return maybeExist;
  1634. }
  1635. return IRealSource::AddColumn(ctx, column);
  1636. }
  1637. bool IsMissingInProjection(TContext& ctx, const TColumnNode& column) const {
  1638. TString columnName = FullColumnName(column);
  1639. if (Columns.Real.contains(columnName) || Columns.Artificial.contains(columnName)) {
  1640. return false;
  1641. }
  1642. if (!ctx.SimpleColumns && Columns.QualifiedAll && !columnName.Contains('.')) {
  1643. return false;
  1644. }
  1645. if (!Columns.IsColumnPossible(ctx, columnName)) {
  1646. return true;
  1647. }
  1648. for (auto without: Without) {
  1649. auto name = *without->GetColumnName();
  1650. if (Source && Source->GetJoin()) {
  1651. name = DotJoin(*without->GetSourceName(), name);
  1652. }
  1653. if (name == columnName) {
  1654. return true;
  1655. }
  1656. }
  1657. return false;
  1658. }
  1659. TNodePtr PrepareWithout(const TNodePtr& base) {
  1660. auto terms = base;
  1661. if (Without) {
  1662. for (auto without: Without) {
  1663. auto name = *without->GetColumnName();
  1664. if (Source && Source->GetJoin()) {
  1665. name = DotJoin(*without->GetSourceName(), name);
  1666. }
  1667. terms = L(terms, Y("let", "row", Y(ForceWithout ? "ForceRemoveMember" : "RemoveMember", "row", Q(name))));
  1668. }
  1669. }
  1670. if (Source) {
  1671. for (auto column : Source->GetTmpWindowColumns()) {
  1672. terms = L(terms, Y("let", "row", Y("RemoveMember", "row", Q(column))));
  1673. }
  1674. }
  1675. return terms;
  1676. }
  1677. TNodePtr DoClone() const final {
  1678. return new TSelectCore(Pos, Source->CloneSource(), CloneContainer(GroupByExpr),
  1679. CloneContainer(GroupBy), CompactGroupBy, GroupBySuffix, AssumeSorted, CloneContainer(OrderBy),
  1680. SafeClone(Having), CloneContainer(WinSpecs), SafeClone(LegacyHoppingWindowSpec),
  1681. CloneContainer(Terms), Distinct, Without, ForceWithout, SelectStream, Settings, TColumnsSets(UniqueSets), TColumnsSets(DistinctSets));
  1682. }
  1683. private:
  1684. bool InitSelect(TContext& ctx, ISource* src, bool isJoin, bool& hasError) {
  1685. for (auto& [name, winSpec] : WinSpecs) {
  1686. for (size_t i = 0; i < winSpec->Partitions.size(); ++i) {
  1687. auto partitionNode = winSpec->Partitions[i];
  1688. if (auto sessionWindow = dynamic_cast<TSessionWindow*>(partitionNode.Get())) {
  1689. if (winSpec->Session) {
  1690. ctx.Error(partitionNode->GetPos()) << "Duplicate session window specification:";
  1691. ctx.Error(winSpec->Session->GetPos()) << "Previous session window is declared here";
  1692. hasError = true;
  1693. continue;
  1694. }
  1695. sessionWindow->MarkValid();
  1696. winSpec->Session = partitionNode;
  1697. }
  1698. if (!partitionNode->Init(ctx, src)) {
  1699. hasError = true;
  1700. continue;
  1701. }
  1702. if (!partitionNode->GetLabel() && !partitionNode->GetColumnName()) {
  1703. TString label = TStringBuilder() << "group_" << name << "_" << i;
  1704. partitionNode->SetLabel(label);
  1705. src->AddTmpWindowColumn(label);
  1706. }
  1707. }
  1708. if (!src->AddExpressions(ctx, winSpec->Partitions, EExprSeat::WindowPartitionBy)) {
  1709. hasError = true;
  1710. }
  1711. }
  1712. if (LegacyHoppingWindowSpec) {
  1713. if (!LegacyHoppingWindowSpec->TimeExtractor->Init(ctx, src)) {
  1714. hasError = true;
  1715. }
  1716. src->SetLegacyHoppingWindowSpec(LegacyHoppingWindowSpec);
  1717. }
  1718. for (auto& term: Terms) {
  1719. if (!term->Init(ctx, src)) {
  1720. hasError = true;
  1721. continue;
  1722. }
  1723. auto column = term->GetColumnName();
  1724. TString label(term->GetLabel());
  1725. bool hasName = true;
  1726. if (label.empty()) {
  1727. auto source = term->GetSourceName();
  1728. if (term->IsAsterisk() && !source->empty()) {
  1729. Columns.QualifiedAll = true;
  1730. label = DotJoin(*source, "*");
  1731. } else if (column) {
  1732. label = isJoin && source && *source ? DotJoin(*source, *column) : *column;
  1733. } else {
  1734. label = Columns.AddUnnamed();
  1735. hasName = false;
  1736. if (ctx.WarnUnnamedColumns) {
  1737. ctx.Warning(term->GetPos(), TIssuesIds::YQL_UNNAMED_COLUMN)
  1738. << "Autogenerated column name " << label << " will be used for expression";
  1739. }
  1740. }
  1741. }
  1742. if (hasName && !Columns.Add(&label, false, false, true)) {
  1743. ctx.Error(Pos) << "Duplicate column: " << label;
  1744. hasError = true;
  1745. }
  1746. }
  1747. CompositeTerms = Y();
  1748. if (!hasError && Source->IsCompositeSource() && !Columns.All && !Columns.QualifiedAll && !Columns.List.empty()) {
  1749. auto compositeSrcPtr = static_cast<TCompositeSelect*>(Source->GetCompositeSource());
  1750. if (compositeSrcPtr) {
  1751. const auto& groupings = compositeSrcPtr->GetGroupingCols();
  1752. for (const auto& column: groupings) {
  1753. if (Source->IsGroupByColumn(column)) {
  1754. continue;
  1755. }
  1756. const TString tableName = (GroupByExpr || DistinctAggrExpr) ? "preaggregated" : "origcore";
  1757. CompositeTerms = L(CompositeTerms, Y("let", "row", Y("AddMember", "row", BuildQuotedAtom(Pos, column), Y("Nothing", Y("MatchType",
  1758. Y("StructMemberType", Y("ListItemType", Y("TypeOf", tableName)), Q(column)),
  1759. Q("Optional"), Y("lambda", Q(Y("item")), "item"), Y("lambda", Q(Y("item")), Y("OptionalType", "item")))))));
  1760. }
  1761. }
  1762. }
  1763. for (auto iter: WinSpecs) {
  1764. auto winSpec = *iter.second;
  1765. for (auto orderSpec: winSpec.OrderBy) {
  1766. if (!orderSpec->OrderExpr->Init(ctx, src)) {
  1767. hasError = true;
  1768. }
  1769. }
  1770. }
  1771. if (Columns.All || Columns.QualifiedAll) {
  1772. Source->AllColumns();
  1773. }
  1774. for (const auto& without: Without) {
  1775. auto namePtr = without->GetColumnName();
  1776. auto sourcePtr = without->GetSourceName();
  1777. YQL_ENSURE(namePtr && *namePtr);
  1778. if (isJoin && !(sourcePtr && *sourcePtr)) {
  1779. ctx.Error(without->GetPos()) << "Expected correlation name for WITHOUT in JOIN";
  1780. hasError = true;
  1781. continue;
  1782. }
  1783. }
  1784. if (Having && !Having->Init(ctx, src)) {
  1785. hasError = true;
  1786. }
  1787. if (!src->IsCompositeSource() && !Columns.All && src->HasAggregations()) {
  1788. WarnIfAliasFromSelectIsUsedInGroupBy(ctx, Terms, GroupBy, GroupByExpr);
  1789. /// verify select aggregation compatibility
  1790. TVector<TNodePtr> exprs(Terms);
  1791. if (Having) {
  1792. exprs.push_back(Having);
  1793. }
  1794. for (const auto& iter: WinSpecs) {
  1795. for (const auto& sortSpec: iter.second->OrderBy) {
  1796. exprs.push_back(sortSpec->OrderExpr);
  1797. }
  1798. }
  1799. if (!ValidateAllNodesForAggregation(ctx, exprs)) {
  1800. hasError = true;
  1801. }
  1802. }
  1803. const auto label = GetLabel();
  1804. for (const auto& sortSpec: OrderBy) {
  1805. auto& expr = sortSpec->OrderExpr;
  1806. SetLabel(Source->GetLabel());
  1807. OrderByInit = true;
  1808. if (!expr->Init(ctx, this)) {
  1809. hasError = true;
  1810. continue;
  1811. }
  1812. OrderByInit = false;
  1813. if (!IsComparableExpression(ctx, expr, AssumeSorted, AssumeSorted ? "ASSUME ORDER BY" : "ORDER BY")) {
  1814. hasError = true;
  1815. continue;
  1816. }
  1817. }
  1818. SetLabel(label);
  1819. return !hasError;
  1820. }
  1821. TNodePtr PrepareJoinCoalesce(TContext& ctx, const TNodePtr& base, bool multipleQualifiedAll, const TVector<TString>& coalesceLabels) {
  1822. const bool isJoin = Source->GetJoin();
  1823. const bool needCoalesce = isJoin && ctx.SimpleColumns &&
  1824. (Columns.All || multipleQualifiedAll || ctx.CoalesceJoinKeysOnQualifiedAll);
  1825. if (!needCoalesce) {
  1826. return base;
  1827. }
  1828. auto terms = base;
  1829. const auto& sameKeyMap = Source->GetJoin()->GetSameKeysMap();
  1830. if (sameKeyMap) {
  1831. terms = L(terms, Y("let", "flatSameKeys", "row"));
  1832. for (const auto& [key, sources]: sameKeyMap) {
  1833. auto coalesceKeys = Y();
  1834. for (const auto& label : coalesceLabels) {
  1835. if (sources.contains(label)) {
  1836. coalesceKeys = L(coalesceKeys, Q(DotJoin(label, key)));
  1837. }
  1838. }
  1839. terms = L(terms, Y("let", "flatSameKeys", Y("CoalesceMembers", "flatSameKeys", Q(coalesceKeys))));
  1840. }
  1841. terms = L(terms, Y("let", "row", "flatSameKeys"));
  1842. }
  1843. return terms;
  1844. }
  1845. TNodePtr BuildSqlProject(TContext& ctx, bool ordered) {
  1846. auto sqlProjectArgs = Y();
  1847. const bool isJoin = Source->GetJoin();
  1848. if (Columns.All) {
  1849. YQL_ENSURE(Columns.List.empty());
  1850. auto terms = PrepareWithout(Y());
  1851. auto options = Y();
  1852. if (isJoin && ctx.SimpleColumns) {
  1853. terms = PrepareJoinCoalesce(ctx, terms, false, Source->GetJoin()->GetJoinLabels());
  1854. auto members = Y();
  1855. for (auto& source : Source->GetJoin()->GetJoinLabels()) {
  1856. YQL_ENSURE(!source.empty());
  1857. members = L(members, BuildQuotedAtom(Pos, source + "."));
  1858. }
  1859. if (GroupByExpr.empty() || ctx.BogousStarInGroupByOverJoin) {
  1860. terms = L(terms, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
  1861. } else {
  1862. auto groupExprStruct = Y("AsStruct");
  1863. for (auto node : GroupByExpr) {
  1864. auto label = node->GetLabel();
  1865. YQL_ENSURE(label);
  1866. if (Source->IsGroupByColumn(label)) {
  1867. auto name = BuildQuotedAtom(Pos, label);
  1868. groupExprStruct = L(groupExprStruct, Q(Y(name, Y("Member", "row", name))));
  1869. }
  1870. }
  1871. auto groupColumnsStruct = Y("DivePrefixMembers", "row", Q(members));
  1872. terms = L(terms, Y("let", "res", Y("FlattenMembers", Q(Y(BuildQuotedAtom(Pos, ""), groupExprStruct)),
  1873. Q(Y(BuildQuotedAtom(Pos, ""), groupColumnsStruct)))));
  1874. }
  1875. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1876. } else {
  1877. terms = L(terms, Y("let", "res", "row"));
  1878. }
  1879. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, ""), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1880. } else {
  1881. YQL_ENSURE(!Columns.List.empty());
  1882. YQL_ENSURE(Columns.List.size() == Terms.size());
  1883. TVector<TString> coalesceLabels;
  1884. bool multipleQualifiedAll = false;
  1885. if (isJoin && ctx.SimpleColumns) {
  1886. THashSet<TString> starTerms;
  1887. for (auto& term: Terms) {
  1888. if (term->IsAsterisk()) {
  1889. auto sourceName = term->GetSourceName();
  1890. YQL_ENSURE(*sourceName && !sourceName->empty());
  1891. YQL_ENSURE(Columns.QualifiedAll);
  1892. starTerms.insert(*sourceName);
  1893. }
  1894. }
  1895. TVector<TString> matched;
  1896. TVector<TString> unmatched;
  1897. for (auto& label : Source->GetJoin()->GetJoinLabels()) {
  1898. if (starTerms.contains(label)) {
  1899. matched.push_back(label);
  1900. } else {
  1901. unmatched.push_back(label);
  1902. }
  1903. }
  1904. coalesceLabels.insert(coalesceLabels.end(), matched.begin(), matched.end());
  1905. coalesceLabels.insert(coalesceLabels.end(), unmatched.begin(), unmatched.end());
  1906. multipleQualifiedAll = starTerms.size() > 1;
  1907. }
  1908. auto column = Columns.List.begin();
  1909. auto isNamedColumn = Columns.NamedColumns.begin();
  1910. for (auto& term: Terms) {
  1911. auto sourceName = term->GetSourceName();
  1912. if (!term->IsAsterisk()) {
  1913. auto body = Y();
  1914. body = L(body, Y("let", "res", term));
  1915. TPosition lambdaPos = Pos;
  1916. TPosition aliasPos = Pos;
  1917. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1918. // TODO: recanonize for positions below
  1919. lambdaPos = term->GetPos();
  1920. aliasPos = term->GetLabelPos() ? *term->GetLabelPos() : lambdaPos;
  1921. }
  1922. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(aliasPos, *isNamedColumn ? *column : ""), BuildLambda(lambdaPos, Y("row"), body, "res"));
  1923. if (term->IsImplicitLabel() && ctx.WarnOnAnsiAliasShadowing) {
  1924. projectItem = L(projectItem, Q(Y(Q(Y(Q("warnShadow"))))));
  1925. }
  1926. if (!*isNamedColumn) {
  1927. projectItem = L(projectItem, Q(Y(Q(Y(Q("autoName"))))));
  1928. }
  1929. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1930. } else {
  1931. auto terms = PrepareWithout(Y());
  1932. auto options = Y();
  1933. if (ctx.SimpleColumns && !isJoin) {
  1934. terms = L(terms, Y("let", "res", "row"));
  1935. } else {
  1936. terms = PrepareJoinCoalesce(ctx, terms, multipleQualifiedAll, coalesceLabels);
  1937. auto members = isJoin ? Y() : Y("FlattenMembers");
  1938. if (isJoin) {
  1939. members = L(members, BuildQuotedAtom(Pos, *sourceName + "."));
  1940. if (ctx.SimpleColumns) {
  1941. options = L(options, Q(Y(Q("divePrefix"), Q(members))));
  1942. }
  1943. members = Y(ctx.SimpleColumns ? "DivePrefixMembers" : "SelectMembers", "row", Q(members));
  1944. } else {
  1945. auto prefix = BuildQuotedAtom(Pos, ctx.SimpleColumns ? "" : *sourceName + ".");
  1946. members = L(members, Q(Y(prefix, "row")));
  1947. if (!ctx.SimpleColumns) {
  1948. options = L(options, Q(Y(Q("addPrefix"), prefix)));
  1949. }
  1950. }
  1951. terms = L(terms, Y("let", "res", members));
  1952. }
  1953. sqlProjectArgs = L(sqlProjectArgs, Y("SqlProjectStarItem", "projectCoreType", BuildQuotedAtom(Pos, *sourceName), BuildLambda(Pos, Y("row"), terms, "res"), Q(options)));
  1954. }
  1955. ++column;
  1956. ++isNamedColumn;
  1957. }
  1958. }
  1959. for (const auto& [columnName, column]: ExtraSortColumns) {
  1960. auto body = Y();
  1961. body = L(body, Y("let", "res", column));
  1962. TPosition pos = column->GetPos();
  1963. auto projectItem = Y("SqlProjectItem", "projectCoreType", BuildQuotedAtom(pos, columnName), BuildLambda(pos, Y("row"), body, "res"));
  1964. sqlProjectArgs = L(sqlProjectArgs, projectItem);
  1965. }
  1966. auto block(Y(Y("let", "projectCoreType", Y("TypeOf", "core"))));
  1967. block = L(block, Y("let", "core", Y(ordered ? "OrderedSqlProject" : "SqlProject", "core", Q(sqlProjectArgs))));
  1968. if (!(UniqueSets.empty() && DistinctSets.empty())) {
  1969. block = L(block, Y("let", "core", Y("RemoveSystemMembers", "core")));
  1970. const auto MakeUniqueHint = [this](INode::TPtr& block, const TColumnsSets& sets, bool distinct) {
  1971. if (!sets.empty()) {
  1972. auto assume = Y(distinct ? "AssumeDistinctHint" : "AssumeUniqueHint", "core");
  1973. if (!sets.front().empty()) {
  1974. for (const auto& columns : sets) {
  1975. auto set = Y();
  1976. for (const auto& column : columns) {
  1977. set = L(set, Q(column));
  1978. }
  1979. assume = L(assume, Q(set));
  1980. }
  1981. }
  1982. block = L(block, Y("let", "core", assume));
  1983. }
  1984. };
  1985. MakeUniqueHint(block, DistinctSets, true);
  1986. MakeUniqueHint(block, UniqueSets, false);
  1987. }
  1988. return Y("block", Q(L(block, Y("return", "core"))));
  1989. }
  1990. private:
  1991. TSourcePtr Source;
  1992. TVector<TNodePtr> GroupByExpr;
  1993. TVector<TNodePtr> DistinctAggrExpr;
  1994. TVector<TNodePtr> GroupBy;
  1995. bool AssumeSorted = false;
  1996. bool CompactGroupBy = false;
  1997. TString GroupBySuffix;
  1998. TVector<TSortSpecificationPtr> OrderBy;
  1999. TNodePtr Having;
  2000. TWinSpecs WinSpecs;
  2001. TNodePtr Flatten;
  2002. TNodePtr PreFlattenMap;
  2003. TNodePtr PreaggregatedMap;
  2004. TNodePtr PrewindowMap;
  2005. TNodePtr Aggregate;
  2006. TNodePtr CalcOverWindow;
  2007. TNodePtr CompositeTerms;
  2008. TVector<TNodePtr> Terms;
  2009. TVector<TNodePtr> Without;
  2010. const bool ForceWithout;
  2011. const bool Distinct;
  2012. bool OrderByInit = false;
  2013. TLegacyHoppingWindowSpecPtr LegacyHoppingWindowSpec;
  2014. const bool SelectStream;
  2015. const TWriteSettings Settings;
  2016. const TColumnsSets UniqueSets, DistinctSets;
  2017. TMap<TString, TNodePtr> ExtraSortColumns;
  2018. };
  2019. class TProcessSource: public IRealSource {
  2020. public:
  2021. TProcessSource(
  2022. TPosition pos,
  2023. TSourcePtr source,
  2024. TNodePtr with,
  2025. bool withExtFunction,
  2026. TVector<TNodePtr>&& terms,
  2027. bool listCall,
  2028. bool processStream,
  2029. const TWriteSettings& settings,
  2030. const TVector<TSortSpecificationPtr>& assumeOrderBy
  2031. )
  2032. : IRealSource(pos)
  2033. , Source(std::move(source))
  2034. , With(with)
  2035. , WithExtFunction(withExtFunction)
  2036. , Terms(std::move(terms))
  2037. , ListCall(listCall)
  2038. , ProcessStream(processStream)
  2039. , Settings(settings)
  2040. , AssumeOrderBy(assumeOrderBy)
  2041. {
  2042. }
  2043. void GetInputTables(TTableList& tableList) const override {
  2044. Source->GetInputTables(tableList);
  2045. ISource::GetInputTables(tableList);
  2046. }
  2047. bool DoInit(TContext& ctx, ISource* initSrc) override {
  2048. if (AsInner) {
  2049. Source->UseAsInner();
  2050. }
  2051. if (!Source->Init(ctx, initSrc)) {
  2052. return false;
  2053. }
  2054. if (ProcessStream && !Source->IsStream()) {
  2055. ctx.Error(Pos) << "PROCESS STREAM is unsupported for non-streaming sources";
  2056. return false;
  2057. }
  2058. auto src = Source.Get();
  2059. if (!With) {
  2060. src->AllColumns();
  2061. Columns.SetAll();
  2062. src->FinishColumns();
  2063. return true;
  2064. }
  2065. /// grouped expressions are available in filters
  2066. if (!Source->InitFilters(ctx)) {
  2067. return false;
  2068. }
  2069. TSourcePtr fakeSource = nullptr;
  2070. if (ListCall && !WithExtFunction) {
  2071. fakeSource = BuildFakeSource(src->GetPos());
  2072. src->AllColumns();
  2073. }
  2074. auto processSource = fakeSource != nullptr ? fakeSource.Get() : src;
  2075. Y_DEBUG_ABORT_UNLESS(processSource != nullptr);
  2076. if (!With->Init(ctx, processSource)) {
  2077. return false;
  2078. }
  2079. if (With->GetLabel().empty()) {
  2080. Columns.SetAll();
  2081. } else {
  2082. if (ListCall) {
  2083. ctx.Error(With->GetPos()) << "Label is not allowed to use with TableRows()";
  2084. return false;
  2085. }
  2086. Columns.Add(&With->GetLabel(), false);
  2087. }
  2088. bool hasError = false;
  2089. TNodePtr produce;
  2090. if (WithExtFunction) {
  2091. produce = Y();
  2092. } else {
  2093. TString processCall = (ListCall ? "SqlProcess" : "Apply");
  2094. produce = Y(processCall, With);
  2095. }
  2096. TMaybe<ui32> listPosIndex;
  2097. ui32 termIndex = 0;
  2098. for (auto& term: Terms) {
  2099. if (!term->GetLabel().empty()) {
  2100. ctx.Error(term->GetPos()) << "Labels are not allowed for PROCESS terms";
  2101. hasError = true;
  2102. continue;
  2103. }
  2104. if (!term->Init(ctx, processSource)) {
  2105. hasError = true;
  2106. continue;
  2107. }
  2108. if (ListCall) {
  2109. if (auto atom = dynamic_cast<TTableRows*>(term.Get())) {
  2110. listPosIndex = termIndex;
  2111. }
  2112. }
  2113. ++termIndex;
  2114. produce = L(produce, term);
  2115. }
  2116. if (hasError) {
  2117. return false;
  2118. }
  2119. if (ListCall && !WithExtFunction) {
  2120. YQL_ENSURE(listPosIndex.Defined());
  2121. produce = L(produce, Q(ToString(*listPosIndex)));
  2122. }
  2123. if (!produce->Init(ctx, src)) {
  2124. hasError = true;
  2125. }
  2126. if (!(WithExtFunction && Terms.empty())) {
  2127. TVector<TNodePtr>(1, produce).swap(Terms);
  2128. }
  2129. src->FinishColumns();
  2130. const auto label = GetLabel();
  2131. for (const auto& sortSpec: AssumeOrderBy) {
  2132. auto& expr = sortSpec->OrderExpr;
  2133. SetLabel(Source->GetLabel());
  2134. if (!expr->Init(ctx, this)) {
  2135. hasError = true;
  2136. continue;
  2137. }
  2138. if (!IsComparableExpression(ctx, expr, true, "ASSUME ORDER BY")) {
  2139. hasError = true;
  2140. continue;
  2141. }
  2142. }
  2143. SetLabel(label);
  2144. return !hasError;
  2145. }
  2146. TNodePtr Build(TContext& ctx) override {
  2147. auto input = Source->Build(ctx);
  2148. if (!input) {
  2149. return nullptr;
  2150. }
  2151. if (!With) {
  2152. auto res = input;
  2153. if (ctx.EnableSystemColumns) {
  2154. res = Y("RemoveSystemMembers", res);
  2155. }
  2156. return res;
  2157. }
  2158. TString inputLabel = ListCall ? "inputRowsList" : "core";
  2159. auto block(Y(Y("let", inputLabel, input)));
  2160. auto filter = Source->BuildFilter(ctx, inputLabel);
  2161. if (filter) {
  2162. block = L(block, Y("let", inputLabel, filter));
  2163. }
  2164. if (WithExtFunction) {
  2165. auto preTransform = Y("RemoveSystemMembers", inputLabel);
  2166. if (Terms.size() > 0) {
  2167. preTransform = Y("Map", preTransform, BuildLambda(Pos, Y("row"), Q(Terms[0])));
  2168. }
  2169. block = L(block, Y("let", inputLabel, preTransform));
  2170. block = L(block, Y("let", "transform", With));
  2171. block = L(block, Y("let", "core", Y("Apply", "transform", inputLabel)));
  2172. } else if (ListCall) {
  2173. block = L(block, Y("let", "core", Terms[0]));
  2174. } else {
  2175. auto terms = BuildColumnsTerms(ctx);
  2176. block = L(block, Y("let", "core", Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), terms, "res"))));
  2177. }
  2178. block = L(block, Y("let", "core", Y("AutoDemux", Y("PersistableRepr", "core"))));
  2179. return Y("block", Q(L(block, Y("return", "core"))));
  2180. }
  2181. TNodePtr BuildSort(TContext& ctx, const TString& label) override {
  2182. Y_UNUSED(ctx);
  2183. if (AssumeOrderBy.empty()) {
  2184. return nullptr;
  2185. }
  2186. return Y("let", label, BuildSortSpec(AssumeOrderBy, label, false, true));
  2187. }
  2188. EOrderKind GetOrderKind() const override {
  2189. if (!With) {
  2190. return EOrderKind::Passthrough;
  2191. }
  2192. return AssumeOrderBy.empty() ? EOrderKind::None : EOrderKind::Assume;
  2193. }
  2194. bool IsSelect() const override {
  2195. return false;
  2196. }
  2197. bool HasSelectResult() const override {
  2198. return !Settings.Discard;
  2199. }
  2200. bool IsStream() const override {
  2201. return Source->IsStream();
  2202. }
  2203. TWriteSettings GetWriteSettings() const override {
  2204. return Settings;
  2205. }
  2206. TNodePtr DoClone() const final {
  2207. return new TProcessSource(Pos, Source->CloneSource(), SafeClone(With), WithExtFunction,
  2208. CloneContainer(Terms), ListCall, ProcessStream, Settings, CloneContainer(AssumeOrderBy));
  2209. }
  2210. private:
  2211. TNodePtr BuildColumnsTerms(TContext& ctx) {
  2212. Y_UNUSED(ctx);
  2213. TNodePtr terms;
  2214. Y_DEBUG_ABORT_UNLESS(Terms.size() == 1);
  2215. if (Columns.All) {
  2216. terms = Y(Y("let", "res", Y("ToSequence", Terms.front())));
  2217. } else {
  2218. Y_DEBUG_ABORT_UNLESS(Columns.List.size() == Terms.size());
  2219. terms = L(Y(), Y("let", "res",
  2220. L(Y("AsStructUnordered"), Q(Y(BuildQuotedAtom(Pos, Columns.List.front()), Terms.front())))));
  2221. terms = L(terms, Y("let", "res", Y("Just", "res")));
  2222. }
  2223. return terms;
  2224. }
  2225. private:
  2226. TSourcePtr Source;
  2227. TNodePtr With;
  2228. const bool WithExtFunction;
  2229. TVector<TNodePtr> Terms;
  2230. const bool ListCall;
  2231. const bool ProcessStream;
  2232. const TWriteSettings Settings;
  2233. TVector<TSortSpecificationPtr> AssumeOrderBy;
  2234. };
  2235. TSourcePtr BuildProcess(
  2236. TPosition pos,
  2237. TSourcePtr source,
  2238. TNodePtr with,
  2239. bool withExtFunction,
  2240. TVector<TNodePtr>&& terms,
  2241. bool listCall,
  2242. bool processStream,
  2243. const TWriteSettings& settings,
  2244. const TVector<TSortSpecificationPtr>& assumeOrderBy
  2245. ) {
  2246. return new TProcessSource(pos, std::move(source), with, withExtFunction, std::move(terms), listCall, processStream, settings, assumeOrderBy);
  2247. }
  2248. class TNestedProxySource: public IProxySource {
  2249. public:
  2250. TNestedProxySource(TPosition pos, const TVector<TNodePtr>& groupBy, TSourcePtr source)
  2251. : IProxySource(pos, source.Get())
  2252. , CompositeSelect(nullptr)
  2253. , Holder(std::move(source))
  2254. , GroupBy(groupBy)
  2255. {}
  2256. TNestedProxySource(TCompositeSelect* compositeSelect, const TVector<TNodePtr>& groupBy)
  2257. : IProxySource(compositeSelect->GetPos(), compositeSelect->RealSource())
  2258. , CompositeSelect(compositeSelect)
  2259. , GroupBy(groupBy)
  2260. {}
  2261. bool DoInit(TContext& ctx, ISource* src) override {
  2262. return Source->Init(ctx, src);
  2263. }
  2264. TNodePtr Build(TContext& ctx) override {
  2265. return CompositeSelect ? BuildAtom(Pos, "composite", TNodeFlags::Default) : Source->Build(ctx);
  2266. }
  2267. bool InitFilters(TContext& ctx) override {
  2268. return CompositeSelect ? true : Source->InitFilters(ctx);
  2269. }
  2270. TNodePtr BuildFilter(TContext& ctx, const TString& label) override {
  2271. return CompositeSelect ? nullptr : Source->BuildFilter(ctx, label);
  2272. }
  2273. IJoin* GetJoin() override {
  2274. return Source->GetJoin();
  2275. }
  2276. bool IsCompositeSource() const override {
  2277. return true;
  2278. }
  2279. ISource* GetCompositeSource() override {
  2280. return CompositeSelect;
  2281. }
  2282. bool AddGrouping(TContext& ctx, const TVector<TString>& columns, TString& hintColumn) override {
  2283. Y_UNUSED(ctx);
  2284. hintColumn = TStringBuilder() << "GroupingHint" << Hints.size();
  2285. ui64 hint = 0;
  2286. if (GroupByColumns.empty()) {
  2287. const bool isJoin = GetJoin();
  2288. for (const auto& groupByNode: GroupBy) {
  2289. auto namePtr = groupByNode->GetColumnName();
  2290. YQL_ENSURE(namePtr);
  2291. TString column = *namePtr;
  2292. if (isJoin) {
  2293. auto sourceNamePtr = groupByNode->GetSourceName();
  2294. if (sourceNamePtr && !sourceNamePtr->empty()) {
  2295. column = DotJoin(*sourceNamePtr, column);
  2296. }
  2297. }
  2298. GroupByColumns.insert(column);
  2299. }
  2300. }
  2301. for (const auto& column: columns) {
  2302. hint <<= 1;
  2303. if (!GroupByColumns.contains(column)) {
  2304. hint += 1;
  2305. }
  2306. }
  2307. Hints.push_back(hint);
  2308. return true;
  2309. }
  2310. size_t GetGroupingColumnsCount() const override {
  2311. return Hints.size();
  2312. }
  2313. TNodePtr BuildGroupingColumns(const TString& label) override {
  2314. if (Hints.empty()) {
  2315. return nullptr;
  2316. }
  2317. auto body = Y();
  2318. for (size_t i = 0; i < Hints.size(); ++i) {
  2319. TString hintColumn = TStringBuilder() << "GroupingHint" << i;
  2320. TString hintValue = ToString(Hints[i]);
  2321. body = L(body, Y("let", "row", Y("AddMember", "row", Q(hintColumn), Y("Uint64", Q(hintValue)))));
  2322. }
  2323. return Y("Map", label, BuildLambda(Pos, Y("row"), body, "row"));
  2324. }
  2325. void FinishColumns() override {
  2326. Source->FinishColumns();
  2327. }
  2328. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2329. if (const TString* columnName = column.GetColumnName()) {
  2330. if (columnName && IsExprAlias(*columnName)) {
  2331. return true;
  2332. }
  2333. }
  2334. return Source->AddColumn(ctx, column);
  2335. }
  2336. TPtr DoClone() const final {
  2337. YQL_ENSURE(Hints.empty());
  2338. return Holder.Get() ? new TNestedProxySource(Pos, CloneContainer(GroupBy), Holder->CloneSource()) :
  2339. new TNestedProxySource(CompositeSelect, CloneContainer(GroupBy));
  2340. }
  2341. private:
  2342. TCompositeSelect* CompositeSelect;
  2343. TSourcePtr Holder;
  2344. TVector<TNodePtr> GroupBy;
  2345. mutable TSet<TString> GroupByColumns;
  2346. mutable TVector<ui64> Hints;
  2347. };
  2348. namespace {
  2349. TSourcePtr DoBuildSelectCore(
  2350. TContext& ctx,
  2351. TPosition pos,
  2352. TSourcePtr originalSource,
  2353. TSourcePtr source,
  2354. const TVector<TNodePtr>& groupByExpr,
  2355. const TVector<TNodePtr>& groupBy,
  2356. bool compactGroupBy,
  2357. const TString& groupBySuffix,
  2358. bool assumeSorted,
  2359. const TVector<TSortSpecificationPtr>& orderBy,
  2360. TNodePtr having,
  2361. TWinSpecs&& winSpecs,
  2362. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2363. TVector<TNodePtr>&& terms,
  2364. bool distinct,
  2365. TVector<TNodePtr>&& without,
  2366. bool forceWithout,
  2367. bool selectStream,
  2368. const TWriteSettings& settings,
  2369. TColumnsSets&& uniqueSets,
  2370. TColumnsSets&& distinctSets
  2371. ) {
  2372. if (groupBy.empty() || !groupBy.front()->ContentListPtr()) {
  2373. return new TSelectCore(pos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted,
  2374. orderBy, having, winSpecs, legacyHoppingWindowSpec, terms, distinct, without, forceWithout, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2375. }
  2376. if (groupBy.size() == 1) {
  2377. /// actualy no big idea to use grouping function in this case (result allways 0)
  2378. auto contentPtr = groupBy.front()->ContentListPtr();
  2379. source = new TNestedProxySource(pos, *contentPtr, source);
  2380. return DoBuildSelectCore(ctx, pos, originalSource, source, groupByExpr, *contentPtr, compactGroupBy, groupBySuffix,
  2381. assumeSorted, orderBy, having, std::move(winSpecs),
  2382. legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), forceWithout, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2383. }
  2384. /// \todo some smart merge logic, generalize common part of grouping (expr, flatten, etc)?
  2385. TIntrusivePtr<TCompositeSelect> compositeSelect = new TCompositeSelect(pos, std::move(source), originalSource->CloneSource(), settings);
  2386. size_t totalGroups = 0;
  2387. TVector<TSourcePtr> subselects;
  2388. TVector<TNodePtr> groupingCols;
  2389. for (auto& grouping: groupBy) {
  2390. auto contentPtr = grouping->ContentListPtr();
  2391. TVector<TNodePtr> cache(1, nullptr);
  2392. if (!contentPtr) {
  2393. cache[0] = grouping;
  2394. contentPtr = &cache;
  2395. }
  2396. groupingCols.insert(groupingCols.end(), contentPtr->cbegin(), contentPtr->cend());
  2397. TSourcePtr proxySource = new TNestedProxySource(compositeSelect.Get(), CloneContainer(*contentPtr));
  2398. if (!subselects.empty()) {
  2399. /// clone terms for others usage
  2400. TVector<TNodePtr> termsCopy;
  2401. for (const auto& term: terms) {
  2402. termsCopy.emplace_back(term->Clone());
  2403. }
  2404. std::swap(terms, termsCopy);
  2405. }
  2406. totalGroups += contentPtr->size();
  2407. TSelectCore* selectCore = new TSelectCore(pos, std::move(proxySource), CloneContainer(groupByExpr),
  2408. CloneContainer(*contentPtr), compactGroupBy, groupBySuffix, assumeSorted, orderBy, SafeClone(having), CloneContainer(winSpecs),
  2409. legacyHoppingWindowSpec, terms, distinct, without, forceWithout, selectStream, settings, TColumnsSets(uniqueSets), TColumnsSets(distinctSets));
  2410. subselects.emplace_back(selectCore);
  2411. }
  2412. if (totalGroups > ctx.PragmaGroupByLimit) {
  2413. ctx.Error(pos) << "Unable to GROUP BY more than " << ctx.PragmaGroupByLimit << " groups, you try use " << totalGroups << " groups";
  2414. return nullptr;
  2415. }
  2416. compositeSelect->SetSubselects(std::move(subselects), std::move(groupingCols), CloneContainer(groupByExpr));
  2417. return compositeSelect;
  2418. }
  2419. }
  2420. TSourcePtr BuildSelectCore(
  2421. TContext& ctx,
  2422. TPosition pos,
  2423. TSourcePtr source,
  2424. const TVector<TNodePtr>& groupByExpr,
  2425. const TVector<TNodePtr>& groupBy,
  2426. bool compactGroupBy,
  2427. const TString& groupBySuffix,
  2428. bool assumeSorted,
  2429. const TVector<TSortSpecificationPtr>& orderBy,
  2430. TNodePtr having,
  2431. TWinSpecs&& winSpecs,
  2432. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec,
  2433. TVector<TNodePtr>&& terms,
  2434. bool distinct,
  2435. TVector<TNodePtr>&& without,
  2436. bool forceWithout,
  2437. bool selectStream,
  2438. const TWriteSettings& settings,
  2439. TColumnsSets&& uniqueSets,
  2440. TColumnsSets&& distinctSets
  2441. )
  2442. {
  2443. return DoBuildSelectCore(ctx, pos, source, source, groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy,
  2444. having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), forceWithout, selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  2445. }
  2446. class TUnion: public IRealSource {
  2447. public:
  2448. TUnion(TPosition pos, TVector<TSourcePtr>&& sources, bool quantifierAll, const TWriteSettings& settings)
  2449. : IRealSource(pos)
  2450. , Sources(std::move(sources))
  2451. , QuantifierAll(quantifierAll)
  2452. , Settings(settings)
  2453. {
  2454. }
  2455. const TColumns* GetColumns() const override {
  2456. return IRealSource::GetColumns();
  2457. }
  2458. void GetInputTables(TTableList& tableList) const override {
  2459. for (auto& x : Sources) {
  2460. x->GetInputTables(tableList);
  2461. }
  2462. ISource::GetInputTables(tableList);
  2463. }
  2464. bool DoInit(TContext& ctx, ISource* src) override {
  2465. bool first = true;
  2466. for (auto& s: Sources) {
  2467. s->UseAsInner();
  2468. if (!s->Init(ctx, src)) {
  2469. return false;
  2470. }
  2471. if (!ctx.PositionalUnionAll || first) {
  2472. auto c = s->GetColumns();
  2473. Y_DEBUG_ABORT_UNLESS(c);
  2474. Columns.Merge(*c);
  2475. first = false;
  2476. }
  2477. }
  2478. return true;
  2479. }
  2480. TNodePtr Build(TContext& ctx) override {
  2481. TPtr res;
  2482. if (QuantifierAll) {
  2483. if (ctx.EmitUnionMerge) {
  2484. res = ctx.PositionalUnionAll ? Y("UnionMergePositional") : Y("UnionMerge");
  2485. } else {
  2486. res = ctx.PositionalUnionAll ? Y("UnionAllPositional") : Y("UnionAll");
  2487. }
  2488. } else {
  2489. res = ctx.PositionalUnionAll ? Y("UnionPositional") : Y("Union");
  2490. }
  2491. for (auto& s: Sources) {
  2492. auto input = s->Build(ctx);
  2493. if (!input) {
  2494. return nullptr;
  2495. }
  2496. res->Add(input);
  2497. }
  2498. return res;
  2499. }
  2500. bool IsStream() const override {
  2501. for (auto& s: Sources) {
  2502. if (!s->IsStream()) {
  2503. return false;
  2504. }
  2505. }
  2506. return true;
  2507. }
  2508. TNodePtr DoClone() const final {
  2509. return MakeIntrusive<TUnion>(Pos, CloneContainer(Sources), QuantifierAll, Settings);
  2510. }
  2511. bool IsSelect() const override {
  2512. return true;
  2513. }
  2514. bool HasSelectResult() const override {
  2515. return !Settings.Discard;
  2516. }
  2517. TWriteSettings GetWriteSettings() const override {
  2518. return Settings;
  2519. }
  2520. private:
  2521. TVector<TSourcePtr> Sources;
  2522. bool QuantifierAll;
  2523. const TWriteSettings Settings;
  2524. };
  2525. TSourcePtr BuildUnion(
  2526. TPosition pos,
  2527. TVector<TSourcePtr>&& sources,
  2528. bool quantifierAll,
  2529. const TWriteSettings& settings
  2530. ) {
  2531. return new TUnion(pos, std::move(sources), quantifierAll, settings);
  2532. }
  2533. class TOverWindowSource: public IProxySource {
  2534. public:
  2535. TOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource)
  2536. : IProxySource(pos, origSource)
  2537. , WindowName(windowName)
  2538. {
  2539. Source->SetLabel(origSource->GetLabel());
  2540. }
  2541. TString MakeLocalName(const TString& name) override {
  2542. return Source->MakeLocalName(name);
  2543. }
  2544. void AddTmpWindowColumn(const TString& column) override {
  2545. return Source->AddTmpWindowColumn(column);
  2546. }
  2547. bool AddAggregation(TContext& ctx, TAggregationPtr aggr) override {
  2548. if (aggr->IsOverWindow() || aggr->IsOverWindowDistinct()) {
  2549. return Source->AddAggregationOverWindow(ctx, WindowName, aggr);
  2550. }
  2551. return Source->AddAggregation(ctx, aggr);
  2552. }
  2553. bool AddFuncOverWindow(TContext& ctx, TNodePtr expr) override {
  2554. return Source->AddFuncOverWindow(ctx, WindowName, expr);
  2555. }
  2556. bool IsOverWindowSource() const override {
  2557. return true;
  2558. }
  2559. TMaybe<bool> AddColumn(TContext& ctx, TColumnNode& column) override {
  2560. return Source->AddColumn(ctx, column);
  2561. }
  2562. TNodePtr Build(TContext& ctx) override {
  2563. Y_UNUSED(ctx);
  2564. Y_ABORT("Unexpected call");
  2565. }
  2566. const TString* GetWindowName() const override {
  2567. return &WindowName;
  2568. }
  2569. TWindowSpecificationPtr FindWindowSpecification(TContext& ctx, const TString& windowName) const override {
  2570. return Source->FindWindowSpecification(ctx, windowName);
  2571. }
  2572. TNodePtr GetSessionWindowSpec() const override {
  2573. return Source->GetSessionWindowSpec();
  2574. }
  2575. TNodePtr DoClone() const final {
  2576. return {};
  2577. }
  2578. private:
  2579. const TString WindowName;
  2580. };
  2581. TSourcePtr BuildOverWindowSource(TPosition pos, const TString& windowName, ISource* origSource) {
  2582. return new TOverWindowSource(pos, windowName, origSource);
  2583. }
  2584. class TSkipTakeNode final: public TAstListNode {
  2585. public:
  2586. TSkipTakeNode(TPosition pos, const TNodePtr& skip, const TNodePtr& take)
  2587. : TAstListNode(pos), IsSkipProvided_(!!skip)
  2588. {
  2589. TNodePtr select(AstNode("select"));
  2590. if (skip) {
  2591. select = Y("Skip", select, Y("Coalesce", skip, Y("Uint64", Q("0"))));
  2592. }
  2593. static const TString uiMax = ::ToString(std::numeric_limits<ui64>::max());
  2594. Add("let", "select", Y("Take", select, Y("Coalesce", take, Y("Uint64", Q(uiMax)))));
  2595. }
  2596. TPtr DoClone() const final {
  2597. return {};
  2598. }
  2599. bool HasSkip() const {
  2600. return IsSkipProvided_;
  2601. }
  2602. private:
  2603. const bool IsSkipProvided_;
  2604. };
  2605. TNodePtr BuildSkipTake(TPosition pos, const TNodePtr& skip, const TNodePtr& take) {
  2606. return new TSkipTakeNode(pos, skip, take);
  2607. }
  2608. class TSelect: public IProxySource {
  2609. public:
  2610. TSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake)
  2611. : IProxySource(pos, source.Get())
  2612. , Source(std::move(source))
  2613. , SkipTake(skipTake)
  2614. {}
  2615. bool DoInit(TContext& ctx, ISource* src) override {
  2616. Source->SetLabel(Label);
  2617. if (AsInner) {
  2618. Source->UseAsInner();
  2619. }
  2620. if (IgnoreSort()) {
  2621. Source->DisableSort();
  2622. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_ORDER_BY_WITHOUT_LIMIT_IN_SUBQUERY) << "ORDER BY without LIMIT in subquery will be ignored";
  2623. }
  2624. if (!Source->Init(ctx, src)) {
  2625. return false;
  2626. }
  2627. src = Source.Get();
  2628. if (SkipTake) {
  2629. FakeSource = BuildFakeSource(SkipTake->GetPos());
  2630. if (!SkipTake->Init(ctx, FakeSource.Get())) {
  2631. return false;
  2632. }
  2633. if (SkipTake->HasSkip() && EOrderKind::Sort != Source->GetOrderKind()) {
  2634. ctx.Warning(Source->GetPos(), TIssuesIds::YQL_OFFSET_WITHOUT_SORT) << "LIMIT with OFFSET without ORDER BY may provide different results from run to run";
  2635. }
  2636. }
  2637. return true;
  2638. }
  2639. TNodePtr Build(TContext& ctx) override {
  2640. auto input = Source->Build(ctx);
  2641. if (!input) {
  2642. return nullptr;
  2643. }
  2644. const auto label = "select";
  2645. auto block(Y(Y("let", label, input)));
  2646. auto sortNode = Source->BuildSort(ctx, label);
  2647. if (sortNode && !IgnoreSort()) {
  2648. block = L(block, sortNode);
  2649. }
  2650. if (SkipTake) {
  2651. block = L(block, SkipTake);
  2652. }
  2653. TNodePtr sample;
  2654. if (!BuildSamplingLambda(sample)) {
  2655. return nullptr;
  2656. } else if (sample) {
  2657. block = L(block, Y("let", "select", Y("OrderedFlatMap", "select", sample)));
  2658. }
  2659. if (auto removeNode = Source->BuildCleanupColumns(ctx, label)) {
  2660. block = L(block, removeNode);
  2661. }
  2662. block = L(block, Y("return", label));
  2663. return Y("block", Q(block));
  2664. }
  2665. bool SetSamplingOptions(
  2666. TContext& ctx,
  2667. TPosition pos,
  2668. ESampleClause sampleClause,
  2669. ESampleMode mode,
  2670. TNodePtr samplingRate,
  2671. TNodePtr samplingSeed) override {
  2672. if (mode == ESampleMode::System) {
  2673. ctx.Error(pos) << "only Bernoulli sampling mode is supported for subqueries";
  2674. return false;
  2675. }
  2676. if (samplingSeed) {
  2677. ctx.Error(pos) << "'Repeatable' keyword is not supported for subqueries";
  2678. return false;
  2679. }
  2680. return SetSamplingRate(ctx, sampleClause, samplingRate);
  2681. }
  2682. bool IsSelect() const override {
  2683. return Source->IsSelect();
  2684. }
  2685. bool HasSelectResult() const override {
  2686. return Source->HasSelectResult();
  2687. }
  2688. TPtr DoClone() const final {
  2689. return MakeIntrusive<TSelect>(Pos, Source->CloneSource(), SafeClone(SkipTake));
  2690. }
  2691. protected:
  2692. bool IgnoreSort() const {
  2693. return AsInner && !SkipTake && EOrderKind::Sort == Source->GetOrderKind();
  2694. }
  2695. TSourcePtr Source;
  2696. TNodePtr SkipTake;
  2697. TSourcePtr FakeSource;
  2698. };
  2699. TSourcePtr BuildSelect(TPosition pos, TSourcePtr source, TNodePtr skipTake) {
  2700. return new TSelect(pos, std::move(source), skipTake);
  2701. }
  2702. class TSelectResultNode final: public TAstListNode {
  2703. public:
  2704. TSelectResultNode(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2705. TScopedStatePtr scoped)
  2706. : TAstListNode(pos)
  2707. , Source(std::move(source))
  2708. , WriteResult(writeResult)
  2709. , InSubquery(inSubquery)
  2710. , Scoped(scoped)
  2711. {
  2712. YQL_ENSURE(Source, "Invalid source node");
  2713. FakeSource = BuildFakeSource(pos);
  2714. }
  2715. bool IsSelect() const override {
  2716. return true;
  2717. }
  2718. bool HasSelectResult() const override {
  2719. return Source->HasSelectResult();
  2720. }
  2721. bool DoInit(TContext& ctx, ISource* src) override {
  2722. if (!Source->Init(ctx, src)) {
  2723. return false;
  2724. }
  2725. src = Source.Get();
  2726. TTableList tableList;
  2727. Source->GetInputTables(tableList);
  2728. TNodePtr node(BuildInputTables(Pos, tableList, InSubquery, Scoped));
  2729. if (!node->Init(ctx, src)) {
  2730. return false;
  2731. }
  2732. auto writeSettings = src->GetWriteSettings();
  2733. bool asRef = ctx.PragmaRefSelect;
  2734. bool asAutoRef = true;
  2735. if (ctx.PragmaSampleSelect) {
  2736. asRef = false;
  2737. asAutoRef = false;
  2738. }
  2739. auto settings = Y(Q(Y(Q("type"))));
  2740. if (writeSettings.Discard) {
  2741. settings = L(settings, Q(Y(Q("discard"))));
  2742. }
  2743. if (!writeSettings.Label.Empty()) {
  2744. auto labelNode = writeSettings.Label.Build();
  2745. if (!writeSettings.Label.GetLiteral()) {
  2746. labelNode = Y("EvaluateAtom", labelNode);
  2747. }
  2748. if (!labelNode->Init(ctx, FakeSource.Get())) {
  2749. return false;
  2750. }
  2751. settings = L(settings, Q(Y(Q("label"), labelNode)));
  2752. }
  2753. if (asRef) {
  2754. settings = L(settings, Q(Y(Q("ref"))));
  2755. } else if (asAutoRef) {
  2756. settings = L(settings, Q(Y(Q("autoref"))));
  2757. }
  2758. auto columns = Source->GetColumns();
  2759. if (columns && !columns->All && !(columns->QualifiedAll && ctx.SimpleColumns)) {
  2760. auto list = Y();
  2761. YQL_ENSURE(columns->List.size() == columns->NamedColumns.size());
  2762. for (size_t i = 0; i < columns->List.size(); ++i) {
  2763. auto& c = columns->List[i];
  2764. if (c.EndsWith('*')) {
  2765. list = L(list, Q(Y(Q("prefix"), BuildQuotedAtom(Pos, c.substr(0, c.size() - 1)))));
  2766. } else if (columns->NamedColumns[i]) {
  2767. list = L(list, BuildQuotedAtom(Pos, c));
  2768. } else {
  2769. list = L(list, Q(Y(Q("auto"))));
  2770. }
  2771. }
  2772. settings = L(settings, Q(Y(Q("columns"), Q(list))));
  2773. }
  2774. if (ctx.ResultRowsLimit > 0) {
  2775. settings = L(settings, Q(Y(Q("take"), Q(ToString(ctx.ResultRowsLimit)))));
  2776. }
  2777. auto output = Source->Build(ctx);
  2778. if (!output) {
  2779. return false;
  2780. }
  2781. node = L(node, Y("let", "output", output));
  2782. if (WriteResult || writeSettings.Discard) {
  2783. if (EOrderKind::None == Source->GetOrderKind() && ctx.UseUnordered(*Source)) {
  2784. node = L(node, Y("let", "output", Y("Unordered", "output")));
  2785. if (ctx.UnorderedResult) {
  2786. settings = L(settings, Q(Y(Q("unordered"))));
  2787. }
  2788. }
  2789. auto writeResult(BuildWriteResult(Pos, "output", settings));
  2790. if (!writeResult->Init(ctx, src)) {
  2791. return false;
  2792. }
  2793. node = L(node, Y("let", "world", writeResult));
  2794. node = L(node, Y("return", "world"));
  2795. } else {
  2796. node = L(node, Y("return", "output"));
  2797. }
  2798. Add("block", Q(node));
  2799. return true;
  2800. }
  2801. TPtr DoClone() const final {
  2802. return {};
  2803. }
  2804. protected:
  2805. TSourcePtr Source;
  2806. const bool WriteResult;
  2807. const bool InSubquery;
  2808. TScopedStatePtr Scoped;
  2809. TSourcePtr FakeSource;
  2810. };
  2811. TNodePtr BuildSelectResult(TPosition pos, TSourcePtr source, bool writeResult, bool inSubquery,
  2812. TScopedStatePtr scoped) {
  2813. return new TSelectResultNode(pos, std::move(source), writeResult, inSubquery, scoped);
  2814. }
  2815. } // namespace NSQLTranslationV1