node.cpp 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892
  1. #include "node.h"
  2. #include "context.h"
  3. #include <yql/essentials/ast/yql_ast_escaping.h>
  4. #include <yql/essentials/ast/yql_expr.h>
  5. #include <yql/essentials/minikql/mkql_type_ops.h>
  6. #include <yql/essentials/utils/yql_panic.h>
  7. #include <library/cpp/containers/stack_vector/stack_vec.h>
  8. #include <library/cpp/charset/ci_string.h>
  9. #include <util/generic/hash_set.h>
  10. #include <util/stream/str.h>
  11. #include <util/string/cast.h>
  12. #include <util/string/subst.h>
  13. using namespace NYql;
  14. namespace NSQLTranslationV0 {
  15. TString ErrorDistinctWithoutCorrelation(const TString& column) {
  16. return TStringBuilder() << "DISTINCT columns for JOIN in SELECT should have table aliases (correlation name),"
  17. " add it if necessary to FROM section over 'AS <alias>' keyword and put it like '<alias>." << column << "'";
  18. }
  19. TString ErrorDistinctByGroupKey(const TString& column) {
  20. return TStringBuilder() << "Unable to use DISTINCT by grouping column: " << column << ". You should leave one of them.";
  21. }
  22. TTableRef::TTableRef(const TString& refName, const TString& cluster, TNodePtr keys)
  23. : RefName(refName)
  24. , Cluster(cluster)
  25. , Keys(keys)
  26. {
  27. }
  28. TTableRef::TTableRef(const TTableRef& tr)
  29. : RefName(tr.RefName)
  30. , Cluster(tr.Cluster)
  31. , Keys(tr.Keys)
  32. , Options(tr.Options)
  33. {
  34. }
  35. TString TTableRef::ShortName() const {
  36. Y_DEBUG_ABORT_UNLESS(Keys);
  37. if (Keys->GetTableKeys()->GetTableName()) {
  38. return *Keys->GetTableKeys()->GetTableName();
  39. }
  40. return TString();
  41. }
  42. TString TTableRef::ServiceName(const TContext& ctx) const {
  43. auto service = ctx.GetClusterProvider(Cluster);
  44. YQL_ENSURE(service);
  45. return *service;
  46. }
  47. bool TTableRef::Check(TContext& ctx) const {
  48. if (Cluster.empty()) {
  49. ctx.Error() << "No cluster name given and no default cluster is selected";
  50. return false;
  51. }
  52. auto service = ctx.GetClusterProvider(Cluster);
  53. if (!service) {
  54. ctx.Error() << "Unknown cluster name: " << Cluster;
  55. return false;
  56. }
  57. if (!Keys) {
  58. ctx.Error() << "No table name given";
  59. return false;
  60. }
  61. return true;
  62. }
  63. TColumnSchema::TColumnSchema(TPosition pos, const TString& name, const TString& type, bool nullable, bool isTypeString)
  64. : Pos(pos)
  65. , Name(name)
  66. , Type(type)
  67. , Nullable(nullable)
  68. , IsTypeString(isTypeString)
  69. {
  70. }
  71. INode::INode(TPosition pos)
  72. : Pos(pos)
  73. {
  74. }
  75. INode::~INode()
  76. {
  77. }
  78. TPosition INode::GetPos() const {
  79. return Pos;
  80. }
  81. const TString& INode::GetLabel() const {
  82. return Label;
  83. }
  84. void INode::SetLabel(const TString& label) {
  85. Label = label;
  86. }
  87. void INode::SetCountHint(bool isCount) {
  88. State.Set(ENodeState::CountHint, isCount);
  89. }
  90. bool INode::GetCountHint() const {
  91. return State.Test(ENodeState::CountHint);
  92. }
  93. bool INode::IsConstant() const {
  94. return HasState(ENodeState::Const);
  95. }
  96. bool INode::IsAggregated() const {
  97. return HasState(ENodeState::Aggregated);
  98. }
  99. bool INode::IsAggregationKey() const {
  100. return HasState(ENodeState::AggregationKey);
  101. }
  102. bool INode::IsOverWindow() const {
  103. return HasState(ENodeState::OverWindow);
  104. }
  105. bool INode::IsNull() const {
  106. return false;
  107. }
  108. bool INode::IsIntegerLiteral() const {
  109. return false;
  110. }
  111. bool INode::IsAsterisk() const {
  112. return false;
  113. }
  114. const TString* INode::SubqueryAlias() const {
  115. return nullptr;
  116. }
  117. TString INode::GetOpName() const {
  118. return TString();
  119. }
  120. const TString* INode::GetLiteral(const TString& type) const {
  121. Y_UNUSED(type);
  122. return nullptr;
  123. }
  124. const TString* INode::GetColumnName() const {
  125. return nullptr;
  126. }
  127. void INode::AssumeColumn() {
  128. }
  129. const TString* INode::GetSourceName() const {
  130. return nullptr;
  131. }
  132. const TString* INode::GetAtomContent() const {
  133. return nullptr;
  134. }
  135. size_t INode::GetTupleSize() const {
  136. return 0;
  137. }
  138. INode::TPtr INode::GetTupleElement(size_t index) const {
  139. Y_UNUSED(index);
  140. return nullptr;
  141. }
  142. ITableKeys* INode::GetTableKeys() {
  143. return nullptr;
  144. }
  145. ISource* INode::GetSource() {
  146. return nullptr;
  147. }
  148. TVector<TNodePtr>* INode::ContentListPtr() {
  149. return nullptr;
  150. }
  151. bool INode::Init(TContext& ctx, ISource* src) {
  152. if (State.Test(ENodeState::Failed)) {
  153. return false;
  154. }
  155. if (!State.Test(ENodeState::Initialized)) {
  156. if (!DoInit(ctx, src)) {
  157. State.Set(ENodeState::Failed);
  158. return false;
  159. }
  160. State.Set(ENodeState::Initialized);
  161. }
  162. return true;
  163. }
  164. bool INode::DoInit(TContext& ctx, ISource* src) {
  165. Y_UNUSED(ctx);
  166. Y_UNUSED(src);
  167. return true;
  168. }
  169. TNodePtr INode::AstNode() const {
  170. return new TAstListNodeImpl(Pos);
  171. }
  172. TNodePtr INode::AstNode(TNodePtr node) const {
  173. return node;
  174. }
  175. TNodePtr INode::AstNode(const TString& str) const {
  176. return new TAstAtomNodeImpl(Pos, str, TNodeFlags::Default);
  177. }
  178. TNodePtr INode::AstNode(TAstNode* node) const {
  179. return new TAstDirectNode(node);
  180. }
  181. TNodePtr INode::Clone() const {
  182. TNodePtr clone = DoClone();
  183. if (!clone) {
  184. clone = const_cast<INode*>(this);
  185. } else {
  186. YQL_ENSURE(!State.Test(ENodeState::Initialized), "Clone shold be for uninitialized or persistent node");
  187. clone->SetLabel(Label);
  188. }
  189. return clone;
  190. }
  191. TAggregationPtr INode::GetAggregation() const {
  192. return {};
  193. }
  194. INode::TPtr INode::WindowSpecFunc(const TPtr& type) const {
  195. Y_UNUSED(type);
  196. return {};
  197. }
  198. void INode::UseAsInner() {
  199. AsInner = true;
  200. }
  201. bool INode::UsedSubquery() const {
  202. return false;
  203. }
  204. bool INode::IsSelect() const {
  205. return false;
  206. }
  207. TNodePtr INode::ShallowCopy() const {
  208. Y_DEBUG_ABORT_UNLESS(false, "Node is not copyable");
  209. return nullptr;
  210. }
  211. void INode::DoUpdateState() const {
  212. }
  213. void INode::PrecacheState() const {
  214. if (State.Test(ENodeState::Failed)) {
  215. return;
  216. }
  217. /// Not work right now! It's better use Init at first, because some kind of update depend on it
  218. /// \todo turn on and remove all issues
  219. //Y_DEBUG_ABORT_UNLESS(State.Test(ENodeState::Initialized));
  220. if (State.Test(ENodeState::Precached)) {
  221. return;
  222. }
  223. DoUpdateState();
  224. State.Set(ENodeState::Precached);
  225. }
  226. void INode::DoAdd(TNodePtr node) {
  227. Y_UNUSED(node);
  228. Y_DEBUG_ABORT_UNLESS(false, "Node is not expandable");
  229. }
  230. TAstAtomNode::TAstAtomNode(TPosition pos, const TString& content, ui32 flags)
  231. : INode(pos)
  232. , Content(content)
  233. , Flags(flags)
  234. {
  235. }
  236. TAstAtomNode::~TAstAtomNode()
  237. {
  238. }
  239. void TAstAtomNode::DoUpdateState() const {
  240. State.Set(ENodeState::Const);
  241. }
  242. TAstNode* TAstAtomNode::Translate(TContext& ctx) const {
  243. return TAstNode::NewAtom(Pos, Content, *ctx.Pool, Flags);
  244. }
  245. const TString* TAstAtomNode::GetAtomContent() const {
  246. return &Content;
  247. }
  248. TAstDirectNode::TAstDirectNode(TAstNode* node)
  249. : INode(node->GetPosition())
  250. , Node(node)
  251. {
  252. }
  253. TAstNode* TAstDirectNode::Translate(TContext& ctx) const {
  254. Y_UNUSED(ctx);
  255. return Node;
  256. }
  257. TNodePtr BuildAtom(TPosition pos, const TString& content, ui32 flags) {
  258. return new TAstAtomNodeImpl(pos, content, flags);
  259. }
  260. TAstListNode::TAstListNode(TPosition pos)
  261. : INode(pos)
  262. {
  263. }
  264. TAstListNode::~TAstListNode()
  265. {
  266. }
  267. bool TAstListNode::DoInit(TContext& ctx, ISource* src) {
  268. for (auto& node: Nodes) {
  269. if (!node->Init(ctx, src)) {
  270. return false;
  271. }
  272. }
  273. return true;
  274. }
  275. TAstNode* TAstListNode::Translate(TContext& ctx) const {
  276. TSmallVec<TAstNode*> children;
  277. children.reserve(Nodes.size());
  278. auto listPos = Pos;
  279. for (auto& node: Nodes) {
  280. if (node) {
  281. auto astNode = node->Translate(ctx);
  282. if (!astNode) {
  283. return nullptr;
  284. }
  285. children.push_back(astNode);
  286. } else {
  287. ctx.Error(Pos) << "Translation error: encountered empty TNodePtr";
  288. return nullptr;
  289. }
  290. }
  291. return TAstNode::NewList(listPos, children.data(), children.size(), *ctx.Pool);
  292. }
  293. void TAstListNode::UpdateStateByListNodes(const TVector<TNodePtr>& nodes) const {
  294. bool isConst = true;
  295. struct TAttributesFlags {
  296. bool has = false;
  297. bool all = true;
  298. };
  299. std::array<ENodeState, 3> checkStates = {{ENodeState::Aggregated, ENodeState::AggregationKey, ENodeState::OverWindow}};
  300. std::map<ENodeState, TAttributesFlags> flags;
  301. for (auto& node: nodes) {
  302. const bool isNodeConst = node->IsConstant();
  303. for (auto state: checkStates) {
  304. if (node->HasState(state)) {
  305. flags[state].has = true;
  306. } else if (!isNodeConst) {
  307. isConst = false;
  308. flags[state].all = false;
  309. }
  310. }
  311. }
  312. State.Set(ENodeState::Const, isConst);
  313. for (auto& flag: flags) {
  314. State.Set(flag.first, flag.second.has && flag.second.all);
  315. }
  316. }
  317. void TAstListNode::DoUpdateState() const {
  318. UpdateStateByListNodes(Nodes);
  319. }
  320. TAstListNode::TAstListNode(const TAstListNode& node)
  321. : INode(node.Pos)
  322. , Nodes(node.Nodes)
  323. {
  324. Label = node.Label;
  325. State = node.State;
  326. }
  327. TAstListNode::TAstListNode(TPosition pos, TVector<TNodePtr>&& nodes)
  328. : INode(pos)
  329. , Nodes(std::move(nodes))
  330. {
  331. }
  332. TNodePtr TAstListNode::ShallowCopy() const {
  333. return new TAstListNodeImpl(Pos, Nodes);
  334. }
  335. void TAstListNode::DoAdd(TNodePtr node) {
  336. Y_DEBUG_ABORT_UNLESS(node);
  337. Y_DEBUG_ABORT_UNLESS(node.Get() != this);
  338. Nodes.push_back(node);
  339. }
  340. TAstListNodeImpl::TAstListNodeImpl(TPosition pos)
  341. : TAstListNode(pos)
  342. {}
  343. TAstListNodeImpl::TAstListNodeImpl(TPosition pos, TVector<TNodePtr> nodes)
  344. : TAstListNode(pos)
  345. {
  346. Nodes.swap(nodes);
  347. }
  348. TNodePtr TAstListNodeImpl::DoClone() const {
  349. return new TAstListNodeImpl(Pos, CloneContainer(Nodes));
  350. }
  351. bool ValidateAllNodesForAggregation(TContext& ctx, const TVector<TNodePtr>& nodes) {
  352. for (auto& node: nodes) {
  353. if (node->IsConstant()) {
  354. continue;
  355. }
  356. if (!node->IsAggregated() && !node->IsOverWindow()) {
  357. ctx.Error(node->GetPos()) << "Expression has to be an aggregation function or key column, because aggregation is used elsewhere in this subquery";
  358. return false;
  359. }
  360. }
  361. return true;
  362. }
  363. TCallNode::TCallNode(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  364. : TAstListNode(pos)
  365. , OpName(opName)
  366. , MinArgs(minArgs)
  367. , MaxArgs(maxArgs)
  368. , Args(args)
  369. {
  370. }
  371. TString TCallNode::GetOpName() const {
  372. return OpName;
  373. }
  374. const TString* TCallNode::GetSourceName() const {
  375. const TString* name = nullptr;
  376. for (auto& arg: Args) {
  377. auto n = arg->GetSourceName();
  378. if (!n) {
  379. continue;
  380. }
  381. if (name && *n != *name) {
  382. return nullptr;
  383. }
  384. name = n;
  385. }
  386. return name;
  387. }
  388. const TVector<TNodePtr>& TCallNode::GetArgs() const {
  389. return Args;
  390. }
  391. void TCallNode::DoUpdateState() const {
  392. UpdateStateByListNodes(Args);
  393. }
  394. TString TCallNode::GetCallExplain() const {
  395. auto derivedName = GetOpName();
  396. TStringBuilder sb;
  397. sb << derivedName << "()";
  398. if (derivedName != OpName) {
  399. sb << ", converted to " << OpName << "()";
  400. }
  401. return std::move(sb);
  402. }
  403. bool TCallNode::ValidateArguments(TContext& ctx) const {
  404. const auto argsCount = static_cast<i32>(Args.size());
  405. if (MinArgs >= 0 && MaxArgs == MinArgs && argsCount != MinArgs) {
  406. ctx.Error(Pos) << GetCallExplain() << " requires exactly " << MinArgs << " arguments, given: " << Args.size();
  407. return false;
  408. }
  409. if (MinArgs >= 0 && argsCount < MinArgs) {
  410. ctx.Error(Pos) << GetCallExplain() << " requires at least " << MinArgs << " arguments, given: " << Args.size();
  411. return false;
  412. }
  413. if (MaxArgs >= 0 && argsCount > MaxArgs) {
  414. ctx.Error(Pos) << GetCallExplain() << " requires at most " << MaxArgs << " arguments, given: " << Args.size();
  415. return false;
  416. }
  417. return true;
  418. }
  419. bool TCallNode::DoInit(TContext& ctx, ISource* src) {
  420. if (!ValidateArguments(ctx)) {
  421. return false;
  422. }
  423. bool hasError = false;
  424. for (auto& arg: Args) {
  425. if (!arg->Init(ctx, src)) {
  426. hasError = true;
  427. continue;
  428. }
  429. }
  430. if (hasError) {
  431. return false;
  432. }
  433. Nodes.push_back(BuildAtom(Pos, OpName));
  434. Nodes.insert(Nodes.end(), Args.begin(), Args.end());
  435. return true;
  436. }
  437. TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  438. : TCallNode(pos, opName, minArgs, maxArgs, args)
  439. {}
  440. TCallNodeImpl::TCallNodeImpl(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  441. : TCallNode(pos, opName, args.size(), args.size(), args)
  442. {}
  443. TCallNode::TPtr TCallNodeImpl::DoClone() const {
  444. return new TCallNodeImpl(GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args));
  445. }
  446. TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  447. : TCallNode(pos, opName, minArgs, maxArgs, args)
  448. , ReqArgsCount(reqArgsCount)
  449. {}
  450. TCallNodeDepArgs::TCallNodeDepArgs(ui32 reqArgsCount, TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  451. : TCallNode(pos, opName, args.size(), args.size(), args)
  452. , ReqArgsCount(reqArgsCount)
  453. {}
  454. TCallNode::TPtr TCallNodeDepArgs::DoClone() const {
  455. return new TCallNodeDepArgs(ReqArgsCount, GetPos(), OpName, MinArgs, MaxArgs, CloneContainer(Args));
  456. }
  457. bool TCallNodeDepArgs::DoInit(TContext& ctx, ISource* src) {
  458. if (!TCallNode::DoInit(ctx, src)) {
  459. return false;
  460. }
  461. for (ui32 i = 1 + ReqArgsCount; i < Nodes.size(); ++i) {
  462. Nodes[i] = Y("DependsOn", Nodes[i]);
  463. }
  464. return true;
  465. }
  466. TCallDirectRow::TPtr TCallDirectRow::DoClone() const {
  467. return new TCallDirectRow(Pos, OpName, CloneContainer(Args));
  468. }
  469. TCallDirectRow::TCallDirectRow(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  470. : TCallNode(pos, opName, 0, 0, args)
  471. {}
  472. bool TCallDirectRow::DoInit(TContext& ctx, ISource* src) {
  473. if (!src) {
  474. ctx.Error(Pos) << "Unable to use function: " << OpName << " without source";
  475. return false;
  476. }
  477. if (src->IsCompositeSource() || src->GetJoin() || src->HasAggregations() || src->IsFlattenByColumns() || src->IsOverWindowSource()) {
  478. ctx.Error(Pos) << "Failed to use function: " << OpName << " with aggregation, join, flatten by or window functions";
  479. return false;
  480. }
  481. if (!TCallNode::DoInit(ctx, src)) {
  482. return false;
  483. }
  484. Nodes.push_back(Y("DependsOn", "row"));
  485. return true;
  486. }
  487. void TCallDirectRow::DoUpdateState() const {
  488. State.Set(ENodeState::Const, false);
  489. }
  490. void TWinAggrEmulation::DoUpdateState() const {
  491. State.Set(ENodeState::OverWindow, true);
  492. }
  493. bool TWinAggrEmulation::DoInit(TContext& ctx, ISource* src) {
  494. if (!src) {
  495. ctx.Error(Pos) << "Unable to use window function: " << OpName << " without source";
  496. return false;
  497. }
  498. if (!src->IsOverWindowSource()) {
  499. ctx.Error(Pos) << "Failed to use window function: " << OpName << " without window specification";
  500. return false;
  501. }
  502. if (!src->AddFuncOverWindow(ctx, this)) {
  503. ctx.Error(Pos) << "Failed to use window function: " << OpName << " without specification or in wrong place";
  504. return false;
  505. }
  506. FuncAlias = "_yql_" + src->MakeLocalName(OpName);
  507. src->AddTmpWindowColumn(FuncAlias);
  508. ctx.PushBlockShortcuts();
  509. if (!TCallNode::DoInit(ctx, src)) {
  510. return false;
  511. }
  512. WinAggrGround = ctx.GroundBlockShortcuts(Pos);
  513. Nodes.clear();
  514. Add("Member", "row", Q(FuncAlias));
  515. return true;
  516. }
  517. INode::TPtr TWinAggrEmulation::WindowSpecFunc(const TPtr& type) const {
  518. auto result = Y(OpName, type);
  519. for (const auto& arg: Args) {
  520. result = L(result, arg);
  521. }
  522. return Q(Y(Q(FuncAlias), result));
  523. }
  524. TWinAggrEmulation::TWinAggrEmulation(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  525. : TCallNode(pos, opName, minArgs, maxArgs, args)
  526. , FuncAlias(opName)
  527. {}
  528. TWinRowNumber::TWinRowNumber(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  529. : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
  530. {}
  531. TWinLeadLag::TWinLeadLag(TPosition pos, const TString& opName, i32 minArgs, i32 maxArgs, const TVector<TNodePtr>& args)
  532. : TWinAggrEmulation(pos, opName, minArgs, maxArgs, args)
  533. {}
  534. bool TWinLeadLag::DoInit(TContext& ctx, ISource* src) {
  535. if (Args.size() >= 2) {
  536. if (!Args[1]->IsIntegerLiteral()) {
  537. ctx.Error(Args[1]->GetPos()) << "Expected integer literal as second parameter of " << OpName << "( ) function";
  538. return false;
  539. }
  540. }
  541. if (!TWinAggrEmulation::DoInit(ctx, src)) {
  542. return false;
  543. }
  544. if (Args.size() >= 1) {
  545. Args[0] = BuildLambda(Pos, Y("row"), GroundWithExpr(WinAggrGround, Args[0]));
  546. }
  547. return true;
  548. }
  549. class TQuotedAtomNode: public TAstListNode {
  550. public:
  551. TQuotedAtomNode(TPosition pos, const TString& content, ui32 flags)
  552. : TAstListNode(pos)
  553. {
  554. Add("quote", BuildAtom(pos, content, flags));
  555. }
  556. protected:
  557. TQuotedAtomNode(const TQuotedAtomNode& other)
  558. : TAstListNode(other.Pos)
  559. {
  560. Nodes = CloneContainer(other.Nodes);
  561. }
  562. TPtr DoClone() const final {
  563. return new TQuotedAtomNode(*this);
  564. }
  565. };
  566. TNodePtr BuildQuotedAtom(TPosition pos, const TString& content, ui32 flags) {
  567. return new TQuotedAtomNode(pos, content, flags);
  568. }
  569. bool TColumns::Add(const TString* column, bool countHint, bool isArtificial, bool isReliable, bool hasName) {
  570. if (!column || *column == "*") {
  571. if (!countHint) {
  572. SetAll();
  573. }
  574. } else if (!All) {
  575. if (column->EndsWith('*')) {
  576. QualifiedAll = true;
  577. }
  578. bool inserted = false;
  579. if (isArtificial) {
  580. inserted = Artificial.insert(*column).second;
  581. } else {
  582. inserted = Real.insert(*column).second;
  583. }
  584. if (!isReliable) {
  585. HasUnreliable = true;
  586. }
  587. if (std::find(List.begin(), List.end(), *column) == List.end()) {
  588. List.push_back(*column);
  589. NamedColumns.push_back(hasName);
  590. }
  591. return inserted;
  592. }
  593. return All;
  594. }
  595. void TColumns::Merge(const TColumns& columns) {
  596. if (columns.All) {
  597. SetAll();
  598. } else {
  599. for (auto& c: columns.List) {
  600. if (columns.Real.contains(c)) {
  601. Add(&c, false, false);
  602. }
  603. if (columns.Artificial.contains(c)) {
  604. Add(&c, false, true);
  605. }
  606. }
  607. HasUnreliable |= columns.HasUnreliable;
  608. }
  609. }
  610. void TColumns::SetPrefix(const TString& prefix) {
  611. Y_DEBUG_ABORT_UNLESS(!prefix.empty());
  612. auto addPrefixFunc = [&prefix](const TString& str) {
  613. return prefix + "." + str;
  614. };
  615. TSet<TString> newReal;
  616. TSet<TString> newArtificial;
  617. TVector<TString> newList;
  618. std::transform(Real.begin(), Real.end(), std::inserter(newReal, newReal.begin()), addPrefixFunc);
  619. std::transform(Artificial.begin(), Artificial.end(), std::inserter(newArtificial, newArtificial.begin()), addPrefixFunc);
  620. std::transform(List.begin(), List.end(), std::back_inserter(newList), addPrefixFunc);
  621. newReal.swap(Real);
  622. newArtificial.swap(Artificial);
  623. newList.swap(List);
  624. }
  625. void TColumns::SetAll() {
  626. All = true;
  627. Real.clear();
  628. List.clear();
  629. Artificial.clear();
  630. }
  631. bool TColumns::IsColumnPossible(TContext& ctx, const TString& name) {
  632. if (All || Real.contains(name) || Artificial.contains(name)) {
  633. return true;
  634. }
  635. if (QualifiedAll) {
  636. if (ctx.SimpleColumns) {
  637. return true;
  638. }
  639. for (const auto& real: Real) {
  640. const auto pos = real.find_first_of("*");
  641. if (pos == TString::npos) {
  642. continue;
  643. }
  644. if (name.StartsWith(real.substr(0, pos))) {
  645. return true;
  646. }
  647. }
  648. }
  649. return false;
  650. }
  651. TSortSpecificationPtr TSortSpecification::Clone() const {
  652. auto res = MakeIntrusive<TSortSpecification>();
  653. res->OrderExpr = OrderExpr->Clone();
  654. res->Ascending = Ascending;
  655. return res;
  656. }
  657. TWindowSpecificationPtr TWindowSpecification::Clone() const {
  658. auto res = MakeIntrusive<TWindowSpecification>();
  659. res->ExistingWindowName = ExistingWindowName;
  660. res->Partitions = CloneContainer(Partitions);
  661. res->OrderBy = CloneContainer(OrderBy);
  662. res->Frame = Frame;
  663. return res;
  664. }
  665. THoppingWindowSpecPtr THoppingWindowSpec::Clone() const {
  666. auto res = MakeIntrusive<THoppingWindowSpec>();
  667. res->TimeExtractor = TimeExtractor->Clone();
  668. res->Hop = Hop->Clone();
  669. res->Interval = Interval->Clone();
  670. res->Delay = Delay->Clone();
  671. return res;
  672. }
  673. TColumnNode::TColumnNode(TPosition pos, const TString& column, const TString& source)
  674. : INode(pos)
  675. , ColumnName(column)
  676. , Source(source)
  677. {
  678. }
  679. TColumnNode::TColumnNode(TPosition pos, const TNodePtr& column, const TString& source)
  680. : INode(pos)
  681. , ColumnExpr(column)
  682. , Source(source)
  683. {
  684. }
  685. TColumnNode::~TColumnNode()
  686. {
  687. }
  688. bool TColumnNode::IsAsterisk() const {
  689. return ColumnName == "*";
  690. }
  691. bool TColumnNode::IsArtificial() const {
  692. return Artificial;
  693. }
  694. const TString* TColumnNode::GetColumnName() const {
  695. return UseSourceAsColumn ? &Source : (ColumnExpr ? nullptr : &ColumnName);
  696. }
  697. const TString* TColumnNode::GetSourceName() const {
  698. return UseSourceAsColumn ? &Empty : &Source;
  699. }
  700. bool TColumnNode::DoInit(TContext& ctx, ISource* src) {
  701. if (src) {
  702. YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid
  703. if (src->ShouldUseSourceAsColumn(*GetSourceName())) {
  704. if (!IsAsterisk() && IsReliable()) {
  705. SetUseSourceAsColumn();
  706. }
  707. }
  708. if (GetColumnName()) {
  709. auto fullName = Source ? DotJoin(Source, *GetColumnName()) : *GetColumnName();
  710. auto alias = src->GetGroupByColumnAlias(fullName);
  711. if (alias) {
  712. ResetColumn(alias, {});
  713. }
  714. Artificial = !Source && src->IsExprAlias(*GetColumnName());
  715. }
  716. if (!src->AddColumn(ctx, *this)) {
  717. return false;
  718. }
  719. if (GetColumnName()) {
  720. if (src->GetJoin() && Source) {
  721. GroupKey = src->IsGroupByColumn(DotJoin(Source, *GetColumnName()));
  722. } else {
  723. GroupKey = src->IsGroupByColumn(*GetColumnName()) || src->IsAlias(EExprSeat::GroupBy, *GetColumnName());
  724. }
  725. }
  726. }
  727. if (IsAsterisk()) {
  728. Node = AstNode("row");
  729. } else {
  730. Node = Y(Reliable && !UseSource ? "Member" : "SqlColumn", "row", ColumnExpr ?
  731. Y("EvaluateAtom", ColumnExpr) : BuildQuotedAtom(Pos, *GetColumnName()));
  732. if (UseSource) {
  733. YQL_ENSURE(Source);
  734. Node = L(Node, BuildQuotedAtom(Pos, Source));
  735. }
  736. }
  737. return Node->Init(ctx, src);
  738. }
  739. void TColumnNode::SetUseSourceAsColumn() {
  740. YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized or Aggregated already invalid
  741. YQL_ENSURE(!IsAsterisk());
  742. UseSourceAsColumn = true;
  743. }
  744. void TColumnNode::ResetAsReliable() {
  745. Reliable = true;
  746. }
  747. void TColumnNode::SetAsNotReliable() {
  748. Reliable = false;
  749. }
  750. void TColumnNode::SetUseSource() {
  751. UseSource = true;
  752. }
  753. bool TColumnNode::IsUseSourceAsColumn() const {
  754. return UseSourceAsColumn;
  755. }
  756. bool TColumnNode::IsReliable() const {
  757. return Reliable;
  758. }
  759. TNodePtr TColumnNode::DoClone() const {
  760. YQL_ENSURE(!Node, "TColumnNode::Clone: Node should not be initialized");
  761. auto copy = ColumnExpr ? new TColumnNode(Pos, ColumnExpr, Source) : new TColumnNode(Pos, ColumnName, Source);
  762. copy->GroupKey = GroupKey;
  763. copy->Artificial = Artificial;
  764. copy->Reliable = Reliable;
  765. copy->UseSource = UseSource;
  766. copy->UseSourceAsColumn = UseSourceAsColumn;
  767. return copy;
  768. }
  769. void TColumnNode::DoUpdateState() const {
  770. State.Set(ENodeState::Const, false);
  771. State.Set(ENodeState::Aggregated, GroupKey);
  772. State.Set(ENodeState::AggregationKey, GroupKey);
  773. }
  774. TAstNode* TColumnNode::Translate(TContext& ctx) const {
  775. return Node->Translate(ctx);
  776. }
  777. void TColumnNode::ResetColumn(const TString& column, const TString& source) {
  778. YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized
  779. Reliable = true;
  780. UseSource = false;
  781. UseSourceAsColumn = false;
  782. ColumnName = column;
  783. ColumnExpr = nullptr;
  784. Source = source;
  785. }
  786. void TColumnNode::ResetColumn(const TNodePtr& column, const TString& source) {
  787. YQL_ENSURE(!State.Test(ENodeState::Initialized)); /// should be not initialized
  788. Reliable = true;
  789. UseSource = false;
  790. UseSourceAsColumn = false;
  791. ColumnName = "";
  792. ColumnExpr = column;
  793. Source = source;
  794. }
  795. const TString TColumnNode::Empty;
  796. TNodePtr BuildColumn(TPosition pos, const TString& column, const TString& source) {
  797. return new TColumnNode(pos, column, source);
  798. }
  799. TNodePtr BuildColumn(TPosition pos, const TNodePtr& column, const TString& source) {
  800. return new TColumnNode(pos, column, source);
  801. }
  802. TNodePtr BuildColumn(TPosition pos, const TDeferredAtom& column, const TString& source) {
  803. return column.GetLiteral() ? BuildColumn(pos, *column.GetLiteral(), source) : BuildColumn(pos, column.Build(), source);
  804. }
  805. ITableKeys::ITableKeys(TPosition pos)
  806. : INode(pos)
  807. {
  808. }
  809. const TString* ITableKeys::GetTableName() const {
  810. return nullptr;
  811. }
  812. ITableKeys* ITableKeys::GetTableKeys() {
  813. return this;
  814. }
  815. TAstNode* ITableKeys::Translate(TContext& ctx) const {
  816. Y_DEBUG_ABORT_UNLESS(false);
  817. Y_UNUSED(ctx);
  818. return nullptr;
  819. }
  820. bool IAggregation::IsDistinct() const {
  821. return !DistinctKey.empty();
  822. }
  823. void IAggregation::DoUpdateState() const {
  824. State.Set(ENodeState::Aggregated, AggMode == EAggregateMode::Normal);
  825. State.Set(ENodeState::OverWindow, AggMode == EAggregateMode::OverWindow);
  826. }
  827. const TString* IAggregation::GetGenericKey() const {
  828. return nullptr;
  829. }
  830. void IAggregation::Join(IAggregation*) {
  831. Y_ABORT_UNLESS(false);
  832. }
  833. const TString& IAggregation::GetName() const {
  834. return Name;
  835. }
  836. IAggregation::IAggregation(TPosition pos, const TString& name, const TString& func, EAggregateMode aggMode)
  837. : INode(pos), Name(name), Func(func), AggMode(aggMode)
  838. {}
  839. TAstNode* IAggregation::Translate(TContext& ctx) const {
  840. Y_DEBUG_ABORT_UNLESS(false);
  841. Y_UNUSED(ctx);
  842. return nullptr;
  843. }
  844. TNodePtr IAggregation::AggregationTraits(const TNodePtr& type) const {
  845. const bool distinct = AggMode == EAggregateMode::Distinct;
  846. const auto listType = distinct ? Y("ListType", Y("StructMemberType", Y("ListItemType", type), BuildQuotedAtom(Pos, DistinctKey))) : type;
  847. return distinct ? Q(Y(Q(Name), GetApply(listType), BuildQuotedAtom(Pos, DistinctKey))): Q(Y(Q(Name), GetApply(listType)));
  848. }
  849. void IAggregation::AddFactoryArguments(TNodePtr& apply) const {
  850. Y_UNUSED(apply);
  851. }
  852. std::vector<ui32> IAggregation::GetFactoryColumnIndices() const {
  853. return {0u};
  854. }
  855. TNodePtr IAggregation::WindowTraits(const TNodePtr& type) const {
  856. YQL_ENSURE(AggMode == EAggregateMode::OverWindow, "Windows traits is unavailable");
  857. return Q(Y(Q(Name), GetApply(type)));
  858. }
  859. ISource::ISource(TPosition pos)
  860. : INode(pos)
  861. {
  862. }
  863. ISource::~ISource()
  864. {
  865. }
  866. TSourcePtr ISource::CloneSource() const {
  867. Y_DEBUG_ABORT_UNLESS(dynamic_cast<ISource*>(Clone().Get()), "Cloned node is no source");
  868. TSourcePtr result = static_cast<ISource*>(Clone().Get());
  869. for (auto curFilter: Filters) {
  870. result->Filters.emplace_back(curFilter->Clone());
  871. }
  872. for (int i = 0; i < static_cast<int>(EExprSeat::Max); ++i) {
  873. result->NamedExprs[i] = CloneContainer(NamedExprs[i]);
  874. }
  875. result->FlattenColumns = FlattenColumns;
  876. return result;
  877. }
  878. bool ISource::IsFake() const {
  879. return false;
  880. }
  881. void ISource::AllColumns() {
  882. return;
  883. }
  884. const TColumns* ISource::GetColumns() const {
  885. return nullptr;
  886. }
  887. void ISource::GetInputTables(TTableList& tableList) const {
  888. for (auto srcPtr: UsedSources) {
  889. srcPtr->GetInputTables(tableList);
  890. }
  891. return;
  892. }
  893. TMaybe<bool> ISource::AddColumn(TContext& ctx, TColumnNode& column) {
  894. if (column.IsReliable()) {
  895. ctx.Error(Pos) << "Source does not allow column references";
  896. }
  897. return {};
  898. }
  899. void ISource::FinishColumns() {
  900. }
  901. bool ISource::AddFilter(TContext& ctx, TNodePtr filter) {
  902. Y_UNUSED(ctx);
  903. Filters.push_back(filter);
  904. return true;
  905. }
  906. bool ISource::AddGroupKey(TContext& ctx, const TString& column) {
  907. if (!GroupKeys.insert(column).second) {
  908. ctx.Error() << "Duplicate grouping column: " << column;
  909. return false;
  910. }
  911. OrderedGroupKeys.push_back(column);
  912. return true;
  913. }
  914. bool ISource::AddExpressions(TContext& ctx, const TVector<TNodePtr>& expressions, EExprSeat exprSeat) {
  915. YQL_ENSURE(exprSeat < EExprSeat::Max);
  916. TSet<TString> names;
  917. for (const auto& expr: expressions) {
  918. const auto& alias = expr->GetLabel();
  919. const auto& columnNamePtr = expr->GetColumnName();
  920. if (alias) {
  921. if (!ExprAliases.emplace(alias).second) {
  922. ctx.Error(expr->GetPos()) << "Duplicate alias found: " << alias << " in " << exprSeat << " section";
  923. return false;
  924. }
  925. if (names.contains(alias)) {
  926. ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << alias << " in " << exprSeat << " section";
  927. return false;
  928. }
  929. }
  930. if (columnNamePtr) {
  931. const auto& sourceName = *expr->GetSourceName();
  932. auto columnName = *columnNamePtr;
  933. if (sourceName) {
  934. columnName = DotJoin(sourceName, columnName);
  935. }
  936. if (!names.emplace(columnName).second) {
  937. ctx.Error(expr->GetPos()) << "Duplicate column name found: " << columnName << " in " << exprSeat << " section";
  938. return false;
  939. }
  940. if (!alias && ExprAliases.contains(columnName)) {
  941. ctx.Error(expr->GetPos()) << "Collision between alias and column name: " << columnName << " in " << exprSeat << " section";
  942. return false;
  943. }
  944. if (alias && exprSeat == EExprSeat::GroupBy) {
  945. auto columnAlias = GroupByColumnAliases.emplace(columnName, alias);
  946. auto oldAlias = columnAlias.first->second;
  947. if (columnAlias.second && oldAlias != alias) {
  948. ctx.Error(expr->GetPos()) << "Alias for column not same, column: " << columnName <<
  949. ", exist alias: " << oldAlias << ", another alias: " << alias;
  950. return false;
  951. }
  952. }
  953. }
  954. Expressions(exprSeat).emplace_back(expr);
  955. }
  956. return true;
  957. }
  958. void ISource::SetFlattenByMode(const TString& mode) {
  959. FlattenMode = mode;
  960. }
  961. void ISource::MarkFlattenColumns() {
  962. FlattenColumns = true;
  963. }
  964. bool ISource::IsFlattenColumns() const {
  965. return FlattenColumns;
  966. }
  967. TString ISource::MakeLocalName(const TString& name) {
  968. auto iter = GenIndexes.find(name);
  969. if (iter == GenIndexes.end()) {
  970. iter = GenIndexes.emplace(name, 0).first;
  971. }
  972. TStringBuilder str;
  973. str << name << iter->second;
  974. ++iter->second;
  975. return std::move(str);
  976. }
  977. bool ISource::AddAggregation(TContext& ctx, TAggregationPtr aggr) {
  978. Y_UNUSED(ctx);
  979. Aggregations.push_back(aggr);
  980. return true;
  981. }
  982. bool ISource::HasAggregations() const {
  983. return !Aggregations.empty() || !GroupKeys.empty();
  984. }
  985. void ISource::AddWindowSpecs(TWinSpecs winSpecs) {
  986. WinSpecs = winSpecs;
  987. }
  988. bool ISource::AddFuncOverWindow(TContext& ctx, TNodePtr expr) {
  989. Y_UNUSED(ctx);
  990. Y_UNUSED(expr);
  991. return false;
  992. }
  993. void ISource::AddTmpWindowColumn(const TString& column) {
  994. TmpWindowColumns.push_back(column);
  995. }
  996. const TVector<TString>& ISource::GetTmpWindowColumns() const {
  997. return TmpWindowColumns;
  998. }
  999. void ISource::SetHoppingWindowSpec(THoppingWindowSpecPtr spec) {
  1000. HoppingWindowSpec = spec;
  1001. }
  1002. THoppingWindowSpecPtr ISource::GetHoppingWindowSpec() const {
  1003. return HoppingWindowSpec;
  1004. }
  1005. TWindowSpecificationPtr ISource::FindWindowSpecification(TContext& ctx, const TString& windowName) const {
  1006. auto winIter = WinSpecs.find(windowName);
  1007. if (winIter == WinSpecs.end()) {
  1008. ctx.Error(Pos) << "Can't refer to the window specification with name: " << windowName;
  1009. return {};
  1010. }
  1011. auto winSpec = winIter->second;
  1012. if (winSpec->Frame) {
  1013. ctx.Error(Pos) << "Frame that not default is not supported yet for window: " << windowName;
  1014. return {};
  1015. }
  1016. return winSpec;
  1017. }
  1018. inline TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) {
  1019. return NamedExprs[static_cast<size_t>(exprSeat)];
  1020. }
  1021. inline const TVector<TNodePtr>& ISource::Expressions(EExprSeat exprSeat) const {
  1022. return NamedExprs[static_cast<size_t>(exprSeat)];
  1023. }
  1024. inline TNodePtr ISource::AliasOrColumn(const TNodePtr& node, bool withSource) {
  1025. auto result = node->GetLabel();
  1026. if (!result) {
  1027. const auto columnNamePtr = node->GetColumnName();
  1028. YQL_ENSURE(columnNamePtr);
  1029. result = *columnNamePtr;
  1030. if (withSource) {
  1031. const auto sourceNamePtr = node->GetSourceName();
  1032. if (sourceNamePtr) {
  1033. result = DotJoin(*sourceNamePtr, result);
  1034. }
  1035. }
  1036. }
  1037. return node->Q(result);
  1038. }
  1039. bool ISource::AddAggregationOverWindow(TContext& ctx, const TString& windowName, TAggregationPtr func) {
  1040. if (func->IsDistinct()) {
  1041. ctx.Error(func->GetPos()) << "Aggregation with distinct is not allowed over window: " << windowName;
  1042. return false;
  1043. }
  1044. if (!FindWindowSpecification(ctx, windowName)) {
  1045. return false;
  1046. }
  1047. AggregationOverWindow.emplace(windowName, func);
  1048. return true;
  1049. }
  1050. bool ISource::AddFuncOverWindow(TContext& ctx, const TString& windowName, TNodePtr func) {
  1051. if (!FindWindowSpecification(ctx, windowName)) {
  1052. return false;
  1053. }
  1054. FuncOverWindow.emplace(windowName, func);
  1055. return true;
  1056. }
  1057. bool ISource::IsCompositeSource() const {
  1058. return false;
  1059. }
  1060. bool ISource::IsGroupByColumn(const TString& column) const {
  1061. return GroupKeys.contains(column);
  1062. }
  1063. bool ISource::IsFlattenByColumns() const {
  1064. return !Expressions(EExprSeat::FlattenBy).empty();
  1065. }
  1066. bool ISource::IsAlias(EExprSeat exprSeat, const TString& column) const {
  1067. for (const auto& exprNode: Expressions(exprSeat)) {
  1068. const auto& labelName = exprNode->GetLabel();
  1069. if (labelName && labelName == column) {
  1070. return true;
  1071. }
  1072. }
  1073. return false;
  1074. }
  1075. bool ISource::IsExprAlias(const TString& column) const {
  1076. std::array<EExprSeat, 3> exprSeats = {{EExprSeat::FlattenBy, EExprSeat::GroupBy, EExprSeat::WindowPartitionBy}};
  1077. for (auto seat: exprSeats) {
  1078. if (IsAlias(seat, column)) {
  1079. return true;
  1080. }
  1081. }
  1082. return false;
  1083. }
  1084. bool ISource::IsExprSeat(EExprSeat exprSeat, EExprType type) const {
  1085. auto expressions = Expressions(exprSeat);
  1086. if (!expressions) {
  1087. return false;
  1088. }
  1089. for (const auto& exprNode: expressions) {
  1090. if (exprNode->GetLabel()) {
  1091. return type == EExprType::WithExpression;
  1092. }
  1093. }
  1094. return type == EExprType::ColumnOnly;
  1095. }
  1096. TString ISource::GetGroupByColumnAlias(const TString& column) const {
  1097. auto iter = GroupByColumnAliases.find(column);
  1098. if (iter == GroupByColumnAliases.end()) {
  1099. return {};
  1100. }
  1101. return iter->second;
  1102. }
  1103. const TString* ISource::GetWindowName() const {
  1104. return {};
  1105. }
  1106. bool ISource::IsCalcOverWindow() const {
  1107. return !AggregationOverWindow.empty() || !FuncOverWindow.empty();
  1108. }
  1109. bool ISource::IsOverWindowSource() const {
  1110. return !WinSpecs.empty();
  1111. }
  1112. bool ISource::IsStream() const {
  1113. return false;
  1114. }
  1115. bool ISource::IsOrdered() const {
  1116. return false;
  1117. }
  1118. TWriteSettings ISource::GetWriteSettings() const {
  1119. return {};
  1120. }
  1121. bool ISource::SetSamplingOptions(TContext& ctx,
  1122. TPosition pos,
  1123. ESampleMode mode,
  1124. TNodePtr samplingRate,
  1125. TNodePtr samplingSeed) {
  1126. Y_UNUSED(pos);
  1127. Y_UNUSED(mode);
  1128. Y_UNUSED(samplingRate);
  1129. Y_UNUSED(samplingSeed);
  1130. ctx.Error() << "Sampling is only supported for table sources";
  1131. return false;
  1132. }
  1133. bool ISource::CalculateGroupingHint(TContext& ctx, const TVector<TString>& columns, ui64& hint) const {
  1134. Y_UNUSED(columns);
  1135. Y_UNUSED(hint);
  1136. ctx.Error() << "Source not support grouping hint";
  1137. return false;
  1138. }
  1139. TNodePtr ISource::BuildFilter(TContext& ctx, const TString& label, const TNodePtr& groundNode) {
  1140. return Filters.empty() ? nullptr : Y(ctx.UseUnordered(*this) ? "OrderedFilter" : "Filter", label, BuildFilterLambda(groundNode));
  1141. }
  1142. TNodePtr ISource::BuildFilterLambda(const TNodePtr& groundNode) {
  1143. if (Filters.empty()) {
  1144. return BuildLambda(Pos, Y("row"), Y("Bool", Q("true")));
  1145. }
  1146. YQL_ENSURE(Filters[0]->HasState(ENodeState::Initialized));
  1147. TNodePtr filter(Filters[0]);
  1148. for (ui32 i = 1; i < Filters.size(); ++i) {
  1149. YQL_ENSURE(Filters[i]->HasState(ENodeState::Initialized));
  1150. filter = Y("And", filter, Filters[i]);
  1151. }
  1152. filter = Y("Coalesce", filter, Y("Bool", Q("false")));
  1153. if (groundNode) {
  1154. filter = Y("block", Q(L(groundNode, Y("return", filter))));
  1155. }
  1156. return BuildLambda(Pos, Y("row"), filter);
  1157. }
  1158. TNodePtr ISource::BuildFlattenByColumns(const TString& label) {
  1159. auto columnsList = Y("FlattenByColumns", Q(FlattenMode), label);
  1160. for (const auto& column: Expressions(EExprSeat::FlattenBy)) {
  1161. const auto columnNamePtr = column->GetColumnName();
  1162. YQL_ENSURE(columnNamePtr);
  1163. if (column->GetLabel().empty()) {
  1164. columnsList = L(columnsList, Q(*columnNamePtr));
  1165. } else {
  1166. columnsList = L(columnsList, Q(Y(Q(*columnNamePtr), Q(column->GetLabel()))));
  1167. }
  1168. }
  1169. return Y(Y("let", "res", columnsList));
  1170. }
  1171. TNodePtr ISource::BuildFlattenColumns(const TString& label) {
  1172. return Y(Y("let", "res", Y("Just", Y("FlattenStructs", label))));
  1173. }
  1174. TNodePtr ISource::BuildPreaggregatedMap(TContext& ctx) {
  1175. const TColumns* columnsPtr = GetColumns();
  1176. if (!columnsPtr) {
  1177. ctx.Error(GetPos()) << "Missed columns for preaggregated map";
  1178. return nullptr;
  1179. }
  1180. auto structObj = BuildAtom(Pos, "row", TNodeFlags::Default);
  1181. for (const auto& exprNode: Expressions(EExprSeat::GroupBy)) {
  1182. const auto name = exprNode->GetLabel();
  1183. if (name) {
  1184. structObj = Y("ForceRemoveMember", structObj, Q(name));
  1185. structObj = Y("AddMember", structObj, Q(name), exprNode);
  1186. }
  1187. }
  1188. auto block = Y("AsList", structObj);
  1189. return block;
  1190. }
  1191. TNodePtr ISource::BuildPrewindowMap(TContext& ctx, const TNodePtr& groundNode) {
  1192. Y_UNUSED(ctx);
  1193. auto feed = BuildAtom(Pos, "row", TNodeFlags::Default);
  1194. for (const auto& exprNode: Expressions(EExprSeat::WindowPartitionBy)) {
  1195. const auto name = exprNode->GetLabel();
  1196. if (name) {
  1197. feed = Y("AddMember", feed, Q(name), GroundWithExpr(groundNode, exprNode));
  1198. }
  1199. }
  1200. return Y(ctx.UseUnordered(*this) ? "OrderedFlatMap" : "FlatMap", "core", BuildLambda(Pos, Y("row"), Y("AsList", feed)));
  1201. }
  1202. TNodePtr ISource::BuildAggregation(const TString& label) {
  1203. if (GroupKeys.empty() && Aggregations.empty() && !IsCompositeSource()) {
  1204. return nullptr;
  1205. }
  1206. auto keysTuple = Y();
  1207. for (const auto& key: GroupKeys) {
  1208. keysTuple = L(keysTuple, BuildQuotedAtom(Pos, key));
  1209. }
  1210. std::map<std::pair<bool, TString>, std::vector<IAggregation*>> genericAggrs;
  1211. for (const auto& aggr: Aggregations) {
  1212. if (const auto key = aggr->GetGenericKey()) {
  1213. genericAggrs[{aggr->IsDistinct(), *key}].emplace_back(aggr.Get());
  1214. }
  1215. }
  1216. for (const auto& aggr : genericAggrs) {
  1217. for (size_t i = 1U; i < aggr.second.size(); ++i) {
  1218. aggr.second.front()->Join(aggr.second[i]);
  1219. }
  1220. }
  1221. const auto listType = Y("TypeOf", label);
  1222. auto aggrArgs = Y();
  1223. for (const auto& aggr: Aggregations) {
  1224. if (const auto traits = aggr->AggregationTraits(listType))
  1225. aggrArgs = L(aggrArgs, traits);
  1226. }
  1227. if (HoppingWindowSpec) {
  1228. auto hoppingTraits = Y(
  1229. "HoppingTraits",
  1230. Y("ListItemType", listType),
  1231. BuildLambda(Pos, Y("row"), HoppingWindowSpec->TimeExtractor),
  1232. HoppingWindowSpec->Hop,
  1233. HoppingWindowSpec->Interval,
  1234. HoppingWindowSpec->Delay,
  1235. Q("False"),
  1236. Q("v1"));
  1237. return Y("Aggregate", label, Q(keysTuple), Q(aggrArgs),
  1238. Q(Y(Q(Y(BuildQuotedAtom(Pos, "hopping"), hoppingTraits)))));
  1239. }
  1240. return Y("Aggregate", label, Q(keysTuple), Q(aggrArgs));
  1241. }
  1242. TMaybe<TString> ISource::FindColumnMistype(const TString& name) const {
  1243. auto result = FindMistypeIn(GroupKeys, name);
  1244. return result ? result : FindMistypeIn(ExprAliases, name);
  1245. }
  1246. void ISource::AddDependentSource(ISource* usedSource) {
  1247. UsedSources.push_back(usedSource);
  1248. }
  1249. /// \todo fill it
  1250. struct TWinFrame {
  1251. };
  1252. struct TWinPartition {
  1253. TString ParentLabel;
  1254. size_t Id = 0;
  1255. TVector<size_t> FrameIds;
  1256. TVector<TSortSpecificationPtr> OrderBy;
  1257. TVector<TNodePtr> Partitions;
  1258. };
  1259. /// \todo use few levels of grouping (got from group by cube, etc)
  1260. class WindowFuncSupp {
  1261. public:
  1262. struct EvalOverWindow {
  1263. TVector<TAggregationPtr> Aggregations;
  1264. TVector<TNodePtr> Functions;
  1265. };
  1266. size_t GetWindowByName(const TString& windowName) {
  1267. auto iter = WindowMap.find(windowName);
  1268. return iter != WindowMap.end() ? iter->second : 0;
  1269. }
  1270. size_t CreateWindowBySpec(const TString& windowName, const TWindowSpecificationPtr& winSpec) {
  1271. Y_UNUSED(windowName);
  1272. auto curPartitions = winSpec->Partitions;
  1273. auto curOrderBy = winSpec->OrderBy;
  1274. auto partition = std::find_if(Partitions.begin(), Partitions.end(), [&curPartitions, &curOrderBy](const TWinPartition& other) {
  1275. /// \todo this compare is too strong;
  1276. if (curPartitions != other.Partitions) {
  1277. return false;
  1278. }
  1279. if (curOrderBy.size() != other.OrderBy.size()) {
  1280. return false;
  1281. }
  1282. for (unsigned i = 0; i < curOrderBy.size(); ++i) {
  1283. // failed in common case
  1284. if (curOrderBy[i]->OrderExpr != other.OrderBy[i]->OrderExpr) {
  1285. return false;
  1286. }
  1287. if (curOrderBy[i]->Ascending != other.OrderBy[i]->Ascending) {
  1288. return false;
  1289. }
  1290. }
  1291. return true;
  1292. });
  1293. if (partition == Partitions.end()) {
  1294. TWinPartition newPartition;
  1295. newPartition.Partitions = curPartitions;
  1296. newPartition.OrderBy = curOrderBy;
  1297. Partitions.emplace_back(newPartition);
  1298. partition = Partitions.end() - 1;
  1299. }
  1300. /// \todo add smart frame search and creation
  1301. auto frame = partition->FrameIds.begin();
  1302. if (frame == partition->FrameIds.end()) {
  1303. YQL_ENSURE(!winSpec->Frame, "Supported only default frame yet!");
  1304. Evals.push_back({});
  1305. const size_t curEval = Evals.size();
  1306. partition->FrameIds.push_back(curEval);
  1307. frame = partition->FrameIds.end() - 1;
  1308. }
  1309. return *frame;
  1310. }
  1311. void AddAggregationFunc(size_t windowId, TAggregationPtr func) {
  1312. Evals[windowId-1].Aggregations.push_back(func);
  1313. }
  1314. void AddSimpleFunc(size_t windowId, TNodePtr func) {
  1315. Evals[windowId-1].Functions.push_back(func);
  1316. }
  1317. const TVector<TWinPartition>& GetPartitions() {
  1318. return Partitions;
  1319. }
  1320. const EvalOverWindow& GetEvals(size_t frameId) {
  1321. YQL_ENSURE(frameId && frameId <= Evals.size());
  1322. return Evals[frameId-1];
  1323. }
  1324. TNodePtr BuildFrame(TPosition pos, size_t frameId) {
  1325. Y_UNUSED(frameId);
  1326. /// \todo support not default frame
  1327. return BuildLiteralVoid(pos);
  1328. }
  1329. private:
  1330. TVector<TWinPartition> Partitions;
  1331. TMap<TString, size_t> WindowMap;
  1332. TVector<EvalOverWindow> Evals;
  1333. };
  1334. TNodePtr ISource::BuildCalcOverWindow(TContext& ctx, const TString& label, const TNodePtr& ground) {
  1335. if (AggregationOverWindow.empty() && FuncOverWindow.empty()) {
  1336. return {};
  1337. }
  1338. WindowFuncSupp winSupp;
  1339. for (auto iter: AggregationOverWindow) {
  1340. auto windowId = winSupp.GetWindowByName(iter.first);
  1341. if (!windowId) {
  1342. windowId = winSupp.CreateWindowBySpec(iter.first, WinSpecs.at(iter.first));
  1343. }
  1344. winSupp.AddAggregationFunc(windowId, iter.second);
  1345. }
  1346. for (auto iter: FuncOverWindow) {
  1347. auto windowId = winSupp.GetWindowByName(iter.first);
  1348. if (!windowId) {
  1349. windowId = winSupp.CreateWindowBySpec(iter.first, WinSpecs.at(iter.first));
  1350. }
  1351. winSupp.AddSimpleFunc(windowId, iter.second);
  1352. }
  1353. auto partitions = winSupp.GetPartitions();
  1354. const bool onePartition = partitions.size() == 1;
  1355. const auto useLabel = onePartition ? label : "partitioning";
  1356. const auto listType = Y("TypeOf", useLabel);
  1357. auto framesProcess = Y();
  1358. auto resultNode = onePartition ? Y() : Y(Y("let", "partitioning", label));
  1359. for (auto partition: partitions) {
  1360. if (!partition.ParentLabel.empty()) {
  1361. ctx.Error(GetPos()) << "Dependent partition for Window function unsupported yet!";
  1362. return nullptr;
  1363. }
  1364. auto keysTuple = Y();
  1365. for (const auto& key: partition.Partitions) {
  1366. keysTuple = L(keysTuple, AliasOrColumn(key, GetJoin()));
  1367. }
  1368. auto frames = Y();
  1369. for (auto frameId: partition.FrameIds) {
  1370. auto callOnFrame = Y("WinOnRows", winSupp.BuildFrame(ctx.Pos(), frameId));
  1371. const auto& evals = winSupp.GetEvals(frameId);
  1372. for (auto eval: evals.Aggregations) {
  1373. if (!eval->IsOverWindow()) {
  1374. ctx.Error(eval->GetPos()) << "Aggregation over window is not supported for function: " << eval->GetName();
  1375. return nullptr;
  1376. }
  1377. auto winTraits = eval->WindowTraits(listType);
  1378. callOnFrame = L(callOnFrame, winTraits);
  1379. }
  1380. for (auto eval: evals.Functions) {
  1381. auto winSpec = eval->WindowSpecFunc(listType);
  1382. callOnFrame = L(callOnFrame, winSpec);
  1383. }
  1384. /// \todo some smart frame building not "WinOnRows" hardcode
  1385. frames = L(frames, callOnFrame);
  1386. }
  1387. auto sortSpec = partition.OrderBy.empty() ? BuildLiteralVoid(ctx.Pos()) : BuildSortSpec(partition.OrderBy, useLabel, ground, true);
  1388. framesProcess = Y("CalcOverWindow", useLabel, Q(keysTuple), sortSpec, Q(frames));
  1389. if (!onePartition) {
  1390. resultNode = L(resultNode, Y("let", "partitioning", framesProcess));
  1391. }
  1392. }
  1393. if (onePartition) {
  1394. return framesProcess;
  1395. } else {
  1396. return Y("block", Q(L(resultNode, Y("return", "partitioning"))));
  1397. }
  1398. }
  1399. TNodePtr ISource::BuildSort(TContext& ctx, const TString& label) {
  1400. Y_UNUSED(ctx);
  1401. Y_UNUSED(label);
  1402. return nullptr;
  1403. }
  1404. IJoin* ISource::GetJoin() {
  1405. return nullptr;
  1406. }
  1407. ISource* ISource::GetCompositeSource() {
  1408. return nullptr;
  1409. }
  1410. bool ISource::IsSelect() const {
  1411. return true;
  1412. }
  1413. bool ISource::IsTableSource() const {
  1414. return false;
  1415. }
  1416. bool ISource::ShouldUseSourceAsColumn(const TString& source) {
  1417. Y_UNUSED(source);
  1418. return false;
  1419. }
  1420. bool ISource::IsJoinKeysInitializing() const {
  1421. return false;
  1422. }
  1423. bool ISource::DoInit(TContext& ctx, ISource* src) {
  1424. for (auto& column: Expressions(EExprSeat::FlattenBy)) {
  1425. if (!column->Init(ctx, this)) {
  1426. return false;
  1427. }
  1428. }
  1429. if (IsFlattenColumns() && src) {
  1430. src->AllColumns();
  1431. }
  1432. return true;
  1433. }
  1434. bool ISource::InitFilters(TContext& ctx) {
  1435. for (auto& filter: Filters) {
  1436. if (!filter->Init(ctx, this)) {
  1437. return false;
  1438. }
  1439. if (filter->IsAggregated() && !filter->IsConstant() && !filter->HasState(ENodeState::AggregationKey)) {
  1440. ctx.Error(filter->GetPos()) << "Can not use aggregated values in filtering";
  1441. return false;
  1442. }
  1443. }
  1444. return true;
  1445. }
  1446. TAstNode* ISource::Translate(TContext& ctx) const {
  1447. Y_DEBUG_ABORT_UNLESS(false);
  1448. Y_UNUSED(ctx);
  1449. return nullptr;
  1450. }
  1451. void ISource::FillSortParts(const TVector<TSortSpecificationPtr>& orderBy, TNodePtr& sortDirection, TNodePtr& sortKeySelector) {
  1452. TNodePtr expr;
  1453. if (orderBy.empty()) {
  1454. YQL_ENSURE(!sortKeySelector);
  1455. sortDirection = sortKeySelector = Y("Void");
  1456. return;
  1457. } else if (orderBy.size() == 1) {
  1458. auto& sortSpec = orderBy.front();
  1459. expr = Y("EnsurePersistable", sortSpec->OrderExpr);
  1460. sortDirection = Y("Bool", Q(sortSpec->Ascending ? "true" : "false"));
  1461. } else {
  1462. auto exprList = Y();
  1463. sortDirection = Y();
  1464. for (const auto& sortSpec: orderBy) {
  1465. const auto asc = sortSpec->Ascending;
  1466. sortDirection = L(sortDirection, Y("Bool", Q(asc ? "true" : "false")));
  1467. exprList = L(exprList, Y("EnsurePersistable", sortSpec->OrderExpr));
  1468. }
  1469. sortDirection = Q(sortDirection);
  1470. expr = Q(exprList);
  1471. }
  1472. expr = sortKeySelector ? expr->Y("block", expr->Q(expr->L(sortKeySelector, expr->Y("return", expr)))) : expr;
  1473. sortKeySelector = BuildLambda(Pos, Y("row"), expr);
  1474. }
  1475. TNodePtr ISource::BuildSortSpec(const TVector<TSortSpecificationPtr>& orderBy, const TString& label, const TNodePtr& ground, bool traits) {
  1476. YQL_ENSURE(!orderBy.empty());
  1477. TNodePtr dirsNode;
  1478. auto keySelectorNode = ground;
  1479. FillSortParts(orderBy, dirsNode, keySelectorNode);
  1480. if (traits) {
  1481. return Y("SortTraits", Y("TypeOf", label), dirsNode, keySelectorNode);
  1482. } else {
  1483. return Y("Sort", label, dirsNode, keySelectorNode);
  1484. }
  1485. }
  1486. IJoin::IJoin(TPosition pos)
  1487. : ISource(pos)
  1488. {
  1489. }
  1490. IJoin::~IJoin()
  1491. {
  1492. }
  1493. IJoin* IJoin::GetJoin() {
  1494. return this;
  1495. }
  1496. bool TryStringContent(const TString& str, TString& result, ui32& flags, TString& error, TPosition& pos) {
  1497. error.clear();
  1498. result.clear();
  1499. bool doubleQuoted = (str.StartsWith('"') && str.EndsWith('"'));
  1500. bool singleQuoted = !doubleQuoted && (str.StartsWith('\'') && str.EndsWith('\''));
  1501. if (str.size() >= 2 && (doubleQuoted || singleQuoted)) {
  1502. flags = TNodeFlags::ArbitraryContent;
  1503. char quoteChar = doubleQuoted ? '"' : '\'';
  1504. size_t readBytes = 0;
  1505. TStringBuf atom(str);
  1506. TStringOutput sout(result);
  1507. atom.Skip(1);
  1508. result.reserve(str.size());
  1509. auto unescapeResult = UnescapeArbitraryAtom(atom, quoteChar, &sout, &readBytes);
  1510. if (unescapeResult != EUnescapeResult::OK) {
  1511. TTextWalker walker(pos, false);
  1512. walker.Advance(atom.Trunc(readBytes));
  1513. error = UnescapeResultToString(unescapeResult);
  1514. return false;
  1515. }
  1516. } else if (str.size() >= 4 && str.StartsWith("@@") && str.EndsWith("@@")) {
  1517. flags = TNodeFlags::MultilineContent;
  1518. TString s = str.substr(2, str.length() - 4);
  1519. SubstGlobal(s, "@@@@", "@@");
  1520. result.swap(s);
  1521. } else {
  1522. flags = TNodeFlags::Default;
  1523. result = str;
  1524. }
  1525. return true;
  1526. }
  1527. TString StringContent(TContext& ctx, const TString& str) {
  1528. ui32 flags = 0;
  1529. TString result;
  1530. TString error;
  1531. TPosition pos;
  1532. if (!TryStringContent(str, result, flags, error, pos)) {
  1533. ctx.Error(pos) << "Failed to parse string literal: " << error;
  1534. return {};
  1535. }
  1536. return result;
  1537. }
  1538. TString IdContent(TContext& ctx, const TString& s) {
  1539. YQL_ENSURE(!s.empty(), "Empty identifier not expected");
  1540. if (!s.StartsWith('[') && !s.StartsWith('`')) {
  1541. return s;
  1542. }
  1543. auto endSym = s.StartsWith('[') ? ']' : '`';
  1544. if (s.size() < 2 || !s.EndsWith(endSym)) {
  1545. ctx.Error() << "The identifier that starts with: '" << s[0] << "' should ends with: '" << endSym << "'";
  1546. return {};
  1547. }
  1548. size_t skipSymbols = 1;
  1549. /// @TODO: temporary back compatibility case
  1550. if (s.StartsWith('[') && s[1] == '"') {
  1551. ctx.Warning(ctx.Pos(), TIssuesIds::YQL_DEPRECATED_DOUBLE_QUOTE_IN_BRACKETS) <<
  1552. "The use of double quotes in the identifier in square brackets is deprecated."
  1553. " Either simply remove the double quotes or use backticks."
  1554. " If you need quotes they can be escaped by '\\'.";
  1555. if (s.size() < 4 || s[s.size() - 2] != '"') {
  1556. ctx.Error() << "Missed closed quote for identifier, either remove double quote after '[', "
  1557. " or put double quote before ']'";
  1558. return {};
  1559. }
  1560. endSym = '"';
  1561. skipSymbols += 1;
  1562. }
  1563. TStringBuf atom(s.data() + skipSymbols, s.size() - 2 * skipSymbols + 1);
  1564. TString unescapedStr;
  1565. TStringOutput sout(unescapedStr);
  1566. unescapedStr.reserve(s.size());
  1567. size_t readBytes = 0;
  1568. TPosition pos = ctx.Pos();
  1569. pos.Column += skipSymbols - 1;
  1570. auto unescapeResult = UnescapeArbitraryAtom(atom, endSym, &sout, &readBytes);
  1571. if (unescapeResult != EUnescapeResult::OK) {
  1572. TTextWalker walker(pos, false);
  1573. walker.Advance(atom.Trunc(readBytes));
  1574. ctx.Error(pos) << "Cannot parse broken identifier: " << UnescapeResultToString(unescapeResult);
  1575. return {};
  1576. }
  1577. if (readBytes != atom.size()) {
  1578. ctx.Error() << "The identifier not parsed completely";
  1579. return {};
  1580. }
  1581. return unescapedStr;
  1582. }
  1583. namespace {
  1584. class TInvalidLiteralNode final: public INode {
  1585. public:
  1586. TInvalidLiteralNode(TPosition pos)
  1587. : INode(pos)
  1588. {
  1589. }
  1590. bool DoInit(TContext& ctx, ISource* source) override {
  1591. Y_UNUSED(ctx);
  1592. Y_UNUSED(source);
  1593. return false;
  1594. }
  1595. TAstNode* Translate(TContext& ctx) const override {
  1596. Y_UNUSED(ctx);
  1597. return nullptr;
  1598. }
  1599. TPtr DoClone() const override {
  1600. return {};
  1601. }
  1602. };
  1603. }
  1604. TLiteralNode::TLiteralNode(TPosition pos, bool isNull)
  1605. : TAstListNode(pos)
  1606. , Null(isNull)
  1607. , Void(!isNull)
  1608. {
  1609. Add(isNull ? "Null" : "Void");
  1610. }
  1611. TLiteralNode::TLiteralNode(TPosition pos, const TString& type, const TString& value)
  1612. : TAstListNode(pos)
  1613. , Null(false)
  1614. , Void(false)
  1615. , Type(type)
  1616. , Value(value)
  1617. {
  1618. Add(Type, BuildQuotedAtom(Pos, Value));
  1619. }
  1620. TLiteralNode::TLiteralNode(TPosition pos, const TString& value, ui32 nodeFlags)
  1621. : TAstListNode(pos)
  1622. , Null(false)
  1623. , Void(false)
  1624. , Type("String")
  1625. , Value(value)
  1626. {
  1627. Add(Type, BuildQuotedAtom(pos, Value, nodeFlags));
  1628. }
  1629. bool TLiteralNode::IsNull() const {
  1630. return Null;
  1631. }
  1632. const TString* TLiteralNode::GetLiteral(const TString& type) const {
  1633. return type == Type ? &Value : nullptr;
  1634. }
  1635. void TLiteralNode::DoUpdateState() const {
  1636. State.Set(ENodeState::Const);
  1637. }
  1638. TNodePtr TLiteralNode::DoClone() const {
  1639. auto res = (Null || Void) ? MakeIntrusive<TLiteralNode>(Pos, Null) : MakeIntrusive<TLiteralNode>(Pos, Type, Value);
  1640. res->Nodes = Nodes;
  1641. return res;
  1642. }
  1643. template<typename T>
  1644. TLiteralNumberNode<T>::TLiteralNumberNode(TPosition pos, const TString& type, const TString& value)
  1645. : TLiteralNode(pos, type, value)
  1646. {}
  1647. template<typename T>
  1648. TNodePtr TLiteralNumberNode<T>::DoClone() const {
  1649. return new TLiteralNumberNode<T>(Pos, Type, Value);
  1650. }
  1651. template<typename T>
  1652. bool TLiteralNumberNode<T>::DoInit(TContext& ctx, ISource* src) {
  1653. Y_UNUSED(src);
  1654. T val;
  1655. if (!TryFromString(Value, val)) {
  1656. ctx.Error(Pos) << "Failed to convert string: " << Value << " to " << Type << " value";
  1657. return false;
  1658. }
  1659. return true;
  1660. }
  1661. template<typename T>
  1662. bool TLiteralNumberNode<T>::IsIntegerLiteral() const {
  1663. return std::numeric_limits<T>::is_integer;
  1664. }
  1665. template class TLiteralNumberNode<i32>;
  1666. template class TLiteralNumberNode<i64>;
  1667. template class TLiteralNumberNode<ui32>;
  1668. template class TLiteralNumberNode<ui64>;
  1669. template class TLiteralNumberNode<float>;
  1670. template class TLiteralNumberNode<double>;
  1671. template class TLiteralNumberNode<ui8>;
  1672. template class TLiteralNumberNode<i8>;
  1673. template class TLiteralNumberNode<ui16>;
  1674. template class TLiteralNumberNode<i16>;
  1675. TNodePtr BuildLiteralNull(TPosition pos) {
  1676. return new TLiteralNode(pos, true);
  1677. }
  1678. TNodePtr BuildLiteralVoid(TPosition pos) {
  1679. return new TLiteralNode(pos, false);
  1680. }
  1681. TNodePtr BuildLiteralSmartString(TContext& ctx, const TString& value) {
  1682. TString unescaped;
  1683. TString error;
  1684. TPosition pos = ctx.Pos();
  1685. ui32 flags = 0;
  1686. if (TryStringContent(value, unescaped, flags, error, pos)) {
  1687. return new TLiteralNode(ctx.Pos(), unescaped, flags);
  1688. } else {
  1689. ctx.Error(pos) << "Failed to parse string literal: " << error;
  1690. return new TInvalidLiteralNode(ctx.Pos());
  1691. }
  1692. }
  1693. TNodePtr BuildLiteralRawString(TPosition pos, const TString& value) {
  1694. return new TLiteralNode(pos, "String", value);
  1695. }
  1696. TNodePtr BuildLiteralBool(TPosition pos, const TString& value) {
  1697. return new TLiteralNode(pos, "Bool", value);
  1698. }
  1699. TNodePtr BuildEmptyAction(TPosition pos) {
  1700. TNodePtr params = new TAstListNodeImpl(pos);
  1701. TNodePtr arg = new TAstAtomNodeImpl(pos, "x", TNodeFlags::Default);
  1702. params->Add(arg);
  1703. return BuildLambda(pos, params, arg);
  1704. }
  1705. TDeferredAtom::TDeferredAtom()
  1706. {}
  1707. TDeferredAtom::TDeferredAtom(TPosition pos, const TString& str)
  1708. {
  1709. Node = BuildQuotedAtom(pos, str);
  1710. Explicit = str;
  1711. Repr = str;
  1712. }
  1713. TDeferredAtom::TDeferredAtom(TNodePtr node, TContext& ctx)
  1714. {
  1715. Node = node;
  1716. Repr = ctx.MakeName("DeferredAtom");
  1717. }
  1718. const TString* TDeferredAtom::GetLiteral() const {
  1719. return Explicit.Get();
  1720. }
  1721. TNodePtr TDeferredAtom::Build() const {
  1722. return Node;
  1723. }
  1724. TString TDeferredAtom::GetRepr() const {
  1725. return Repr;
  1726. }
  1727. bool TDeferredAtom::Empty() const {
  1728. return !Node || Repr.empty();
  1729. }
  1730. TTupleNode::TTupleNode(TPosition pos, const TVector<TNodePtr>& exprs)
  1731. : TAstListNode(pos)
  1732. , Exprs(exprs)
  1733. {}
  1734. bool TTupleNode::IsEmpty() const {
  1735. return Exprs.empty();
  1736. }
  1737. const TVector<TNodePtr>& TTupleNode::Elements() const {
  1738. return Exprs;
  1739. }
  1740. bool TTupleNode::DoInit(TContext& ctx, ISource* src) {
  1741. auto node(Y());
  1742. for (auto& expr: Exprs) {
  1743. if (expr->GetLabel()) {
  1744. ctx.Error(expr->GetPos()) << "Tuple does not allow named members";
  1745. return false;
  1746. }
  1747. node = L(node, expr);
  1748. }
  1749. Add("quote", node);
  1750. return TAstListNode::DoInit(ctx, src);
  1751. }
  1752. size_t TTupleNode::GetTupleSize() const {
  1753. return Exprs.size();
  1754. }
  1755. TNodePtr TTupleNode::GetTupleElement(size_t index) const {
  1756. return Exprs[index];
  1757. }
  1758. TNodePtr TTupleNode::DoClone() const {
  1759. return new TTupleNode(Pos, CloneContainer(Exprs));
  1760. }
  1761. TNodePtr BuildTuple(TPosition pos, const TVector<TNodePtr>& exprs) {
  1762. return new TTupleNode(pos, exprs);
  1763. }
  1764. TStructNode::TStructNode(TPosition pos, const TVector<TNodePtr>& exprs)
  1765. : TAstListNode(pos)
  1766. , Exprs(exprs)
  1767. {}
  1768. bool TStructNode::DoInit(TContext& ctx, ISource* src) {
  1769. Nodes.push_back(BuildAtom(Pos, "AsStruct", TNodeFlags::Default));
  1770. for (const auto& expr : Exprs) {
  1771. const auto& label = expr->GetLabel();
  1772. if (!label) {
  1773. ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
  1774. return false;
  1775. }
  1776. Nodes.push_back(Q(Y(BuildQuotedAtom(expr->GetPos(), label), expr)));
  1777. }
  1778. return TAstListNode::DoInit(ctx, src);
  1779. }
  1780. TNodePtr TStructNode::DoClone() const {
  1781. return new TStructNode(Pos, CloneContainer(Exprs));
  1782. }
  1783. TNodePtr BuildStructure(TPosition pos, const TVector<TNodePtr>& exprs) {
  1784. return new TStructNode(pos, exprs);
  1785. }
  1786. TListOfNamedNodes::TListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs)
  1787. : INode(pos)
  1788. , Exprs(std::move(exprs))
  1789. {}
  1790. TVector<TNodePtr>* TListOfNamedNodes::ContentListPtr() {
  1791. return &Exprs;
  1792. }
  1793. TAstNode* TListOfNamedNodes::Translate(TContext& ctx) const {
  1794. YQL_ENSURE(!"Unexpected usage");
  1795. Y_UNUSED(ctx);
  1796. return nullptr;
  1797. }
  1798. TNodePtr TListOfNamedNodes::DoClone() const {
  1799. return {};
  1800. }
  1801. TNodePtr BuildListOfNamedNodes(TPosition pos, TVector<TNodePtr>&& exprs) {
  1802. return new TListOfNamedNodes(pos, std::move(exprs));
  1803. }
  1804. const char* const TArgPlaceholderNode::ProcessRows = "$ROWS";
  1805. const char* const TArgPlaceholderNode::ProcessRow = "$ROW";
  1806. TArgPlaceholderNode::TArgPlaceholderNode(TPosition pos, const TString &name) :
  1807. INode(pos),
  1808. Name(name)
  1809. {
  1810. }
  1811. bool TArgPlaceholderNode::DoInit(TContext& ctx, ISource* src) {
  1812. Y_UNUSED(src);
  1813. ctx.Error(Pos) << Name << " can't be used as a part of expression.";
  1814. return false;
  1815. }
  1816. TAstNode* TArgPlaceholderNode::Translate(TContext& ctx) const {
  1817. Y_UNUSED(ctx);
  1818. return nullptr;
  1819. }
  1820. TString TArgPlaceholderNode::GetName() const {
  1821. return Name;
  1822. }
  1823. TNodePtr TArgPlaceholderNode::DoClone() const {
  1824. return {};
  1825. }
  1826. TNodePtr BuildArgPlaceholder(TPosition pos, const TString& name) {
  1827. return new TArgPlaceholderNode(pos, name);
  1828. }
  1829. class TAccessNode: public INode {
  1830. public:
  1831. TAccessNode(TPosition pos, const TVector<TIdPart>& ids, bool isLookup)
  1832. : INode(pos)
  1833. , Ids(ids)
  1834. , IsLookup(isLookup)
  1835. , ColumnOnly(false)
  1836. , IsColumnRequired(false)
  1837. {
  1838. Y_DEBUG_ABORT_UNLESS(Ids.size() > 1);
  1839. Y_DEBUG_ABORT_UNLESS(Ids[0].Expr);
  1840. auto column = dynamic_cast<TColumnNode*>(Ids[0].Expr.Get());
  1841. if (column) {
  1842. ui32 idx = 1;
  1843. TString source;
  1844. if (Ids.size() > 2) {
  1845. source = Ids[idx].Name;
  1846. ++idx;
  1847. }
  1848. ColumnOnly = !IsLookup && Ids.size() < 4;
  1849. if (ColumnOnly && Ids[idx].Expr) {
  1850. column->ResetColumn(Ids[idx].Expr, source);
  1851. } else {
  1852. column->ResetColumn(Ids[idx].Name, source);
  1853. }
  1854. }
  1855. }
  1856. void AssumeColumn() override {
  1857. IsColumnRequired = true;
  1858. }
  1859. TMaybe<std::pair<TString, TString>> TryMakeClusterAndTable(TContext& ctx, bool& hasErrors) {
  1860. hasErrors = false;
  1861. if (!ColumnOnly) {
  1862. return Nothing();
  1863. }
  1864. ui32 idx = 1;
  1865. TString cluster;
  1866. if (Ids.size() > 2) {
  1867. cluster = Ids[idx].Name;
  1868. ++idx;
  1869. }
  1870. if (cluster.StartsWith('$')) {
  1871. return Nothing();
  1872. }
  1873. TString normalizedClusterName;
  1874. if (!cluster.empty() && !ctx.GetClusterProvider(cluster, normalizedClusterName)) {
  1875. hasErrors = true;
  1876. ctx.Error() << "Unknown cluster: " << cluster;
  1877. return Nothing();
  1878. }
  1879. auto tableName = Ids[idx].Name;
  1880. if (tableName.empty()) {
  1881. return Nothing();
  1882. }
  1883. return std::make_pair(normalizedClusterName, tableName);
  1884. }
  1885. TSourcePtr TryMakeSource(TContext& ctx, const TString& view, bool& hasErrors) {
  1886. auto clusterAndTable = TryMakeClusterAndTable(ctx, hasErrors);
  1887. if (!clusterAndTable) {
  1888. return nullptr;
  1889. }
  1890. auto cluster = clusterAndTable->first.empty() ? ctx.CurrCluster : clusterAndTable->first;
  1891. TNodePtr tableKey = BuildTableKey(GetPos(), cluster, TDeferredAtom(GetPos(), clusterAndTable->second), view);
  1892. TTableRef table(ctx.MakeName("table"), cluster, tableKey);
  1893. table.Options = BuildInputOptions(GetPos(), GetContextHints(ctx));
  1894. return BuildTableSource(GetPos(), table, false);
  1895. }
  1896. TMaybe<TString> TryMakeTable() {
  1897. if (!ColumnOnly) {
  1898. return Nothing();
  1899. }
  1900. ui32 idx = 1;
  1901. if (Ids.size() > 2) {
  1902. return Nothing();
  1903. }
  1904. return Ids[idx].Name;
  1905. }
  1906. const TString* GetColumnName() const override {
  1907. return ColumnOnly ? Ids[0].Expr->GetColumnName() : nullptr;
  1908. }
  1909. const TString* GetSourceName() const override {
  1910. return Ids[0].Expr->GetSourceName();
  1911. }
  1912. bool DoInit(TContext& ctx, ISource* src) override {
  1913. auto expr = Ids[0].Expr;
  1914. const TPosition pos(expr->GetPos());
  1915. if (expr->IsAsterisk()) {
  1916. ctx.Error(pos) << "Asterisk column does not allow any access";
  1917. return false;
  1918. }
  1919. if (!expr->Init(ctx, src)) {
  1920. return false;
  1921. }
  1922. for (auto& id: Ids) {
  1923. if (id.Expr && !id.Expr->Init(ctx, src)) {
  1924. return false;
  1925. }
  1926. }
  1927. ui32 idx = 1;
  1928. auto column = dynamic_cast<TColumnNode*>(expr.Get());
  1929. if (column) {
  1930. const bool useSourceAsColumn = column->IsUseSourceAsColumn();
  1931. ColumnOnly &= !useSourceAsColumn;
  1932. if (IsColumnRequired && !ColumnOnly) {
  1933. ctx.Error(pos) << "Please use a full form (corellation.struct.field) or an alias (struct.field as alias) to access struct's field in the GROUP BY";
  1934. return false;
  1935. }
  1936. if (Ids.size() > 2) {
  1937. if (!CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Correlation" : "Column", true)) {
  1938. return false;
  1939. }
  1940. ++idx;
  1941. }
  1942. if (!useSourceAsColumn) {
  1943. if (!IsLookup && !CheckColumnId(pos, ctx, Ids[idx], ColumnOnly ? "Column" : "Member", false)) {
  1944. return false;
  1945. }
  1946. ++idx;
  1947. }
  1948. }
  1949. for (; idx < Ids.size(); ++idx) {
  1950. const auto& id = Ids[idx];
  1951. if (!id.Name.empty()) {
  1952. expr = Y("SqlAccess", Q("struct"), expr, id.Expr ? Y("EvaluateAtom", id.Expr) : BuildQuotedAtom(Pos, id.Name));
  1953. AccessOpName = "AccessStructMember";
  1954. } else if (id.Expr) {
  1955. expr = Y("SqlAccess", Q("dict"), expr, id.Expr);
  1956. AccessOpName = "AccessDictMember";
  1957. } else if (id.Pos >= 0) {
  1958. expr = Y("SqlAccess", Q("tuple"), expr, Q(ToString(id.Pos)));
  1959. AccessOpName = "AccessTupleElement";
  1960. } else {
  1961. continue;
  1962. }
  1963. if (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict) {
  1964. auto ysonOptions = Y();
  1965. if (ctx.PragmaYsonAutoConvert) {
  1966. ysonOptions->Add(BuildQuotedAtom(Pos, "yson_auto_convert"));
  1967. }
  1968. if (ctx.PragmaYsonStrict) {
  1969. ysonOptions->Add(BuildQuotedAtom(Pos, "yson_strict"));
  1970. }
  1971. expr->Add(Q(ysonOptions));
  1972. }
  1973. }
  1974. Node = expr;
  1975. return true;
  1976. }
  1977. TAstNode* Translate(TContext& ctx) const override {
  1978. Y_DEBUG_ABORT_UNLESS(Node);
  1979. return Node->Translate(ctx);
  1980. }
  1981. TPtr DoClone() const override {
  1982. YQL_ENSURE(!Node, "TAccessNode::Clone: Node should not be initialized");
  1983. TVector<TIdPart> cloneIds;
  1984. cloneIds.reserve(Ids.size());
  1985. for (const auto& id: Ids) {
  1986. cloneIds.emplace_back(id.Clone());
  1987. }
  1988. auto copy = new TAccessNode(Pos, cloneIds, IsLookup);
  1989. copy->ColumnOnly = ColumnOnly;
  1990. return copy;
  1991. }
  1992. protected:
  1993. void DoUpdateState() const override {
  1994. State.Set(ENodeState::Const, Ids[0].Expr->IsConstant());
  1995. State.Set(ENodeState::Aggregated, Ids[0].Expr->IsAggregated());
  1996. State.Set(ENodeState::AggregationKey, Ids[0].Expr->HasState(ENodeState::AggregationKey));
  1997. State.Set(ENodeState::OverWindow, Ids[0].Expr->IsOverWindow());
  1998. }
  1999. bool CheckColumnId(TPosition pos, TContext& ctx, const TIdPart& id, const TString& where, bool checkLookup) {
  2000. if (id.Name.empty()) {
  2001. ctx.Error(pos) << where << " name can not be empty";
  2002. return false;
  2003. }
  2004. if (id.Pos >= 0) {
  2005. ctx.Error(pos) << where << " name does not allow element selection";
  2006. return false;
  2007. }
  2008. if (checkLookup && id.Expr) {
  2009. ctx.Error(pos) << where << " name does not allow dict lookup";
  2010. return false;
  2011. }
  2012. return true;
  2013. }
  2014. TString GetOpName() const override {
  2015. return AccessOpName;
  2016. }
  2017. private:
  2018. TNodePtr Node;
  2019. TVector<TIdPart> Ids;
  2020. bool IsLookup;
  2021. bool ColumnOnly;
  2022. bool IsColumnRequired;
  2023. TString AccessOpName;
  2024. };
  2025. TNodePtr BuildAccess(TPosition pos, const TVector<INode::TIdPart>& ids, bool isLookup) {
  2026. return new TAccessNode(pos, ids, isLookup);
  2027. }
  2028. class TBindNode: public TAstListNode {
  2029. public:
  2030. TBindNode(TPosition pos, const TString& module, const TString& alias)
  2031. : TAstListNode(pos)
  2032. {
  2033. Add("bind", AstNode(module), BuildQuotedAtom(pos, alias));
  2034. }
  2035. TPtr DoClone() const final {
  2036. return {};
  2037. }
  2038. };
  2039. TNodePtr BuildBind(TPosition pos, const TString& module, const TString& alias) {
  2040. return new TBindNode(pos, module, alias);
  2041. }
  2042. class TLambdaNode: public TAstListNode {
  2043. public:
  2044. TLambdaNode(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName)
  2045. : TAstListNode(pos)
  2046. {
  2047. if (!resName.empty()) {
  2048. body = Y("block", Q(L(body, Y("return", resName))));
  2049. }
  2050. Add("lambda", Q(params), body);
  2051. }
  2052. TPtr DoClone() const final {
  2053. return {};
  2054. }
  2055. };
  2056. TNodePtr BuildLambda(TPosition pos, TNodePtr params, TNodePtr body, const TString& resName) {
  2057. return new TLambdaNode(pos, params, body, resName);
  2058. }
  2059. template <bool Bit>
  2060. class TCastNode: public TAstListNode {
  2061. public:
  2062. TCastNode(TPosition pos, TNodePtr expr, const TString& typeName, const TString& paramOne, const TString& paramTwo)
  2063. : TAstListNode(pos)
  2064. , Expr(expr)
  2065. , NormalizedTypeName(TypeByAlias(typeName))
  2066. , ParamOne(paramOne)
  2067. , ParamTwo(paramTwo)
  2068. {}
  2069. const TString* GetSourceName() const override {
  2070. return Expr->GetSourceName();
  2071. }
  2072. TString GetOpName() const override {
  2073. return Bit ? "BitCast" : "Cast";
  2074. }
  2075. void DoUpdateState() const override {
  2076. State.Set(ENodeState::Const, Expr->IsConstant());
  2077. State.Set(ENodeState::Aggregated, Expr->IsAggregated());
  2078. State.Set(ENodeState::OverWindow, Expr->IsOverWindow());
  2079. }
  2080. TPtr DoClone() const final {
  2081. return new TCastNode(Pos, Expr->Clone(), NormalizedTypeName, ParamOne, ParamTwo);
  2082. }
  2083. bool DoInit(TContext& ctx, ISource* src) override;
  2084. private:
  2085. TNodePtr Expr;
  2086. const TString NormalizedTypeName;
  2087. const TString ParamOne, ParamTwo;
  2088. };
  2089. template <>
  2090. bool TCastNode<false>::DoInit(TContext& ctx, ISource* src) {
  2091. if (Expr->IsNull()) {
  2092. if (ParamOne.empty() && ParamTwo.empty()) {
  2093. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName))));
  2094. } else if (ParamTwo.empty()) {
  2095. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName), Q(ParamOne))));
  2096. } else {
  2097. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName), Q(ParamOne), Q(ParamTwo))));
  2098. }
  2099. } else {
  2100. if (ParamOne.empty() && ParamTwo.empty()) {
  2101. Add("Cast", Expr, Q(NormalizedTypeName));
  2102. } else if (ParamTwo.empty()) {
  2103. Add("Cast", Expr, Q(NormalizedTypeName), Q(ParamOne));
  2104. } else {
  2105. Add("Cast", Expr, Q(NormalizedTypeName), Q(ParamOne), Q(ParamTwo));
  2106. }
  2107. }
  2108. return TAstListNode::DoInit(ctx, src);
  2109. }
  2110. template <>
  2111. bool TCastNode<true>::DoInit(TContext& ctx, ISource* src) {
  2112. if (Expr->IsNull()) {
  2113. if (ParamOne.empty() && ParamTwo.empty()) {
  2114. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName))));
  2115. } else if (ParamTwo.empty()) {
  2116. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName), Q(ParamOne))));
  2117. } else {
  2118. Add("Nothing", Y("OptionalType", Y("DataType", Q(NormalizedTypeName), Q(ParamOne), Q(ParamTwo))));
  2119. }
  2120. } else {
  2121. if (ParamOne.empty() && ParamTwo.empty()) {
  2122. Add("BitCast", Expr, Q(NormalizedTypeName));
  2123. } else if (ParamTwo.empty()) {
  2124. Add("BitCast", Expr, Q(NormalizedTypeName), Q(ParamOne));
  2125. } else {
  2126. Add("BitCast", Expr, Q(NormalizedTypeName), Q(ParamOne), Q(ParamTwo));
  2127. }
  2128. }
  2129. return TAstListNode::DoInit(ctx, src);
  2130. }
  2131. TNodePtr BuildCast(TContext& ctx, TPosition pos, TNodePtr expr, const TString& typeName, const TString& paramOne, const TString& paramTwo) {
  2132. Y_UNUSED(ctx);
  2133. if (!expr) {
  2134. return nullptr;
  2135. }
  2136. return new TCastNode<false>(pos, expr, typeName, paramOne, paramTwo);
  2137. }
  2138. TNodePtr BuildBitCast(TContext& ctx, TPosition pos, TNodePtr expr, const TString& typeName, const TString& paramOne, const TString& paramTwo) {
  2139. Y_UNUSED(ctx);
  2140. if (!expr) {
  2141. return nullptr;
  2142. }
  2143. return new TCastNode<true>(pos, expr, typeName, paramOne, paramTwo);
  2144. }
  2145. TString TypeByAlias(const TString& alias, bool normalize) {
  2146. TString type(alias);
  2147. TCiString typeAlias(alias);
  2148. if (typeAlias.StartsWith("varchar")) {
  2149. type = "String";
  2150. } else if (typeAlias == "tinyint") {
  2151. type = "Int8";
  2152. } else if (typeAlias == "byte") {
  2153. type = "Uint8";
  2154. } else if (typeAlias == "smallint") {
  2155. type = "Int16";
  2156. } else if (typeAlias == "int" || typeAlias == "integer") {
  2157. type = "Int32";
  2158. } else if (typeAlias == "bigint") {
  2159. type = "Int64";
  2160. }
  2161. return normalize ? NormalizeTypeString(type) : type;
  2162. }
  2163. TNodePtr BuildIsNullOp(TPosition pos, TNodePtr a) {
  2164. if (!a) {
  2165. return nullptr;
  2166. }
  2167. if (a->IsNull()) {
  2168. return BuildLiteralBool(pos, "true");
  2169. }
  2170. return new TCallNodeImpl(pos, "Not", {new TCallNodeImpl(pos, "Exists", {a})});
  2171. }
  2172. TNodePtr BuildUnaryOp(TPosition pos, const TString& opName, TNodePtr a) {
  2173. if (!a) {
  2174. return nullptr;
  2175. }
  2176. if (a->IsNull()) {
  2177. return BuildLiteralNull(pos);
  2178. }
  2179. return new TCallNodeImpl(pos, opName, {a});
  2180. }
  2181. class TBinaryOpNode final: public TCallNode {
  2182. public:
  2183. TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b);
  2184. TNodePtr DoClone() const final {
  2185. YQL_ENSURE(Args.size() == 2);
  2186. return new TBinaryOpNode(Pos, OpName, Args[0]->Clone(), Args[1]->Clone());
  2187. }
  2188. };
  2189. TBinaryOpNode::TBinaryOpNode(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b)
  2190. : TCallNode(pos, opName, 2, 2, { a, b })
  2191. {
  2192. }
  2193. TNodePtr BuildBinaryOp(TPosition pos, const TString& opName, TNodePtr a, TNodePtr b) {
  2194. if (!a || !b) {
  2195. return nullptr;
  2196. }
  2197. if (a->IsNull() && b->IsNull()) {
  2198. return BuildLiteralNull(pos);
  2199. }
  2200. return new TBinaryOpNode(pos, opName, a, b);
  2201. }
  2202. class TCalcOverWindow final: public INode {
  2203. public:
  2204. TCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr node)
  2205. : INode(pos)
  2206. , WindowName(windowName)
  2207. , FuncNode(node)
  2208. {}
  2209. TAstNode* Translate(TContext& ctx) const override {
  2210. return FuncNode->Translate(ctx);
  2211. }
  2212. bool DoInit(TContext& ctx, ISource* src) override {
  2213. YQL_ENSURE(src);
  2214. TSourcePtr overWindowSource = BuildOverWindowSource(ctx.Pos(), WindowName, src);
  2215. if (!FuncNode->Init(ctx, overWindowSource.Get())) {
  2216. return false;
  2217. }
  2218. return true;
  2219. }
  2220. TPtr DoClone() const final {
  2221. return new TCalcOverWindow(Pos, WindowName, SafeClone(FuncNode));
  2222. }
  2223. void DoUpdateState() const override {
  2224. State.Set(ENodeState::Const, FuncNode->IsConstant());
  2225. State.Set(ENodeState::Aggregated, FuncNode->IsAggregated());
  2226. State.Set(ENodeState::OverWindow, true);
  2227. }
  2228. protected:
  2229. const TString WindowName;
  2230. TNodePtr FuncNode;
  2231. };
  2232. TNodePtr BuildCalcOverWindow(TPosition pos, const TString& windowName, TNodePtr call) {
  2233. return new TCalcOverWindow(pos, windowName, call);
  2234. }
  2235. class TYsonOptionsNode final: public INode {
  2236. public:
  2237. TYsonOptionsNode(TPosition pos, bool autoConvert, bool strict)
  2238. : INode(pos)
  2239. , AutoConvert(autoConvert)
  2240. , Strict(strict)
  2241. {
  2242. auto udf = Y("Udf", Q("Yson.Options"));
  2243. auto autoConvertNode = BuildLiteralBool(pos, autoConvert ? "true" : "false");
  2244. autoConvertNode->SetLabel("AutoConvert");
  2245. auto strictNode = BuildLiteralBool(pos, strict ? "true" : "false");
  2246. strictNode->SetLabel("Strict");
  2247. Node = Y("NamedApply", udf, Q(Y()), BuildStructure(pos, { autoConvertNode, strictNode }));
  2248. }
  2249. TAstNode* Translate(TContext& ctx) const override {
  2250. return Node->Translate(ctx);
  2251. }
  2252. bool DoInit(TContext& ctx, ISource* src) override {
  2253. YQL_ENSURE(src);
  2254. if (!Node->Init(ctx, src)) {
  2255. return false;
  2256. }
  2257. return true;
  2258. }
  2259. TPtr DoClone() const final {
  2260. return new TYsonOptionsNode(Pos, AutoConvert, Strict);
  2261. }
  2262. protected:
  2263. TNodePtr Node;
  2264. const bool AutoConvert;
  2265. const bool Strict;
  2266. };
  2267. TNodePtr BuildYsonOptionsNode(TPosition pos, bool autoConvert, bool strict) {
  2268. return new TYsonOptionsNode(pos, autoConvert, strict);
  2269. }
  2270. class TShortcutNode: public TAstAtomNode {
  2271. TNodePtr ShortcutNode;
  2272. TNodePtr SameNode;
  2273. const TString BaseName;
  2274. public:
  2275. TShortcutNode(const TNodePtr& node, const TString& baseName)
  2276. : TAstAtomNode(node->GetPos(), TStringBuilder() << "Shortcut" << baseName, TNodeFlags::Default)
  2277. , ShortcutNode(node)
  2278. , BaseName(baseName)
  2279. {}
  2280. bool DoInit(TContext& ctx, ISource* src) override {
  2281. auto shortcut = ctx.HasBlockShortcut(ShortcutNode);
  2282. if (!shortcut) {
  2283. SameNode = ShortcutNode->Clone();
  2284. if (!SameNode->Init(ctx, src)) {
  2285. return false;
  2286. }
  2287. shortcut = ctx.RegisterBlockShortcut(ShortcutNode, SameNode, BaseName);
  2288. YQL_ENSURE(shortcut);
  2289. } else {
  2290. SameNode = ctx.GetBlockShortcut(shortcut);
  2291. }
  2292. Content = shortcut;
  2293. return true;
  2294. }
  2295. const TString* GetSourceName() const override {
  2296. return ShortcutNode->GetSourceName();
  2297. }
  2298. void DoUpdateState() const override {
  2299. auto& workedNode = SameNode ? SameNode : ShortcutNode;
  2300. State.Set(ENodeState::Const, workedNode->IsConstant());
  2301. State.Set(ENodeState::Aggregated, workedNode->IsAggregated());
  2302. State.Set(ENodeState::OverWindow, workedNode->IsOverWindow());
  2303. }
  2304. TNodePtr DoClone() const final {
  2305. return new TShortcutNode(ShortcutNode, BaseName);
  2306. }
  2307. };
  2308. TNodePtr BuildShortcutNode(const TNodePtr& node, const TString& baseName) {
  2309. return new TShortcutNode(node, baseName);
  2310. }
  2311. class TDoCall final : public INode {
  2312. public:
  2313. TDoCall(TPosition pos, const TNodePtr& node)
  2314. : INode(pos)
  2315. , Node(node)
  2316. {
  2317. FakeSource = BuildFakeSource(pos);
  2318. }
  2319. ISource* GetSource() final {
  2320. return FakeSource.Get();
  2321. }
  2322. bool DoInit(TContext& ctx, ISource* src) final {
  2323. Y_UNUSED(src);
  2324. ctx.PushBlockShortcuts();
  2325. if (!Node->Init(ctx, FakeSource.Get())) {
  2326. return false;
  2327. }
  2328. Node = ctx.GroundBlockShortcutsForExpr(Node);
  2329. return true;
  2330. }
  2331. TAstNode* Translate(TContext& ctx) const final {
  2332. return Node->Translate(ctx);
  2333. }
  2334. TPtr DoClone() const final {
  2335. return new TDoCall(Pos, Node->Clone());
  2336. }
  2337. private:
  2338. TNodePtr Node;
  2339. TSourcePtr FakeSource;
  2340. };
  2341. TNodePtr BuildDoCall(TPosition pos, const TNodePtr& node) {
  2342. return new TDoCall(pos, node);
  2343. }
  2344. bool Parseui32(TNodePtr from, ui32& to) {
  2345. const TString* val;
  2346. if (!(val = from->GetLiteral("Int32"))) {
  2347. if (!(val = from->GetLiteral("Uint32"))) {
  2348. return false;
  2349. }
  2350. }
  2351. return TryFromString(*val, to);
  2352. }
  2353. TNodePtr GroundWithExpr(const TNodePtr& ground, const TNodePtr& expr) {
  2354. return ground ? expr->Y("block", expr->Q(expr->L(ground, expr->Y("return", expr)))) : expr;
  2355. }
  2356. TSourcePtr TryMakeSourceFromExpression(TContext& ctx, TNodePtr node, const TString& view) {
  2357. if (auto literal = node->GetLiteral("String")) {
  2358. if (ctx.CurrCluster.empty()) {
  2359. return nullptr;
  2360. }
  2361. TNodePtr tableKey = BuildTableKey(node->GetPos(), ctx.CurrCluster, TDeferredAtom(node->GetPos(), *literal), view);
  2362. TTableRef table(ctx.MakeName("table"), ctx.CurrCluster, tableKey);
  2363. table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx));
  2364. return BuildTableSource(node->GetPos(), table, false);
  2365. }
  2366. if (auto access = dynamic_cast<TAccessNode*>(node.Get())) {
  2367. bool hasErrors;
  2368. auto src = access->TryMakeSource(ctx, view, hasErrors);
  2369. if (src || hasErrors) {
  2370. return src;
  2371. }
  2372. }
  2373. if (dynamic_cast<TLambdaNode*>(node.Get())) {
  2374. ctx.Error() << "Lambda is not allowed to be used as source. Did you forget to call a subquery template?";
  2375. return nullptr;
  2376. }
  2377. if (ctx.CurrCluster.empty()) {
  2378. return nullptr;
  2379. }
  2380. auto wrappedNode = node->Y("EvaluateAtom", node);
  2381. TNodePtr tableKey = BuildTableKey(node->GetPos(), ctx.CurrCluster, TDeferredAtom(wrappedNode, ctx), view);
  2382. TTableRef table(ctx.MakeName("table"), ctx.CurrCluster, tableKey);
  2383. table.Options = BuildInputOptions(node->GetPos(), GetContextHints(ctx));
  2384. return BuildTableSource(node->GetPos(), table, false);
  2385. }
  2386. void MakeTableFromExpression(TContext& ctx, TNodePtr node, TDeferredAtom& table) {
  2387. if (auto literal = node->GetLiteral("String")) {
  2388. table = TDeferredAtom(node->GetPos(), *literal);
  2389. return;
  2390. }
  2391. if (auto access = dynamic_cast<TAccessNode*>(node.Get())) {
  2392. auto ret = access->TryMakeTable();
  2393. if (ret) {
  2394. table = TDeferredAtom(node->GetPos(), *ret);
  2395. return;
  2396. }
  2397. }
  2398. auto wrappedNode = node->Y("EvaluateAtom", node);
  2399. table = TDeferredAtom(wrappedNode, ctx);
  2400. }
  2401. TDeferredAtom MakeAtomFromExpression(TContext& ctx, TNodePtr node) {
  2402. if (auto literal = node->GetLiteral("String")) {
  2403. return TDeferredAtom(node->GetPos(), *literal);
  2404. }
  2405. auto wrappedNode = node->Y("EvaluateAtom", node);
  2406. return TDeferredAtom(wrappedNode, ctx);
  2407. }
  2408. bool TryMakeClusterAndTableFromExpression(TNodePtr node, TString& cluster, TDeferredAtom& table, TContext& ctx) {
  2409. if (auto literal = node->GetLiteral("String")) {
  2410. cluster.clear();
  2411. table = TDeferredAtom(node->GetPos(), *literal);
  2412. return true;
  2413. }
  2414. if (auto access = dynamic_cast<TAccessNode*>(node.Get())) {
  2415. bool hasErrors;
  2416. auto ret = access->TryMakeClusterAndTable(ctx, hasErrors);
  2417. if (ret) {
  2418. cluster = ret->first;
  2419. table = TDeferredAtom(node->GetPos(), ret->second);
  2420. return true;
  2421. }
  2422. if (hasErrors) {
  2423. return false;
  2424. }
  2425. }
  2426. auto wrappedNode = node->Y("EvaluateAtom", node);
  2427. table = TDeferredAtom(wrappedNode, ctx);
  2428. return true;
  2429. }
  2430. class TTupleResultNode: public INode {
  2431. public:
  2432. TTupleResultNode(TNodePtr&& tuple, int ensureTupleSize)
  2433. : INode(tuple->GetPos())
  2434. , Node(std::move(tuple))
  2435. , EnsureTupleSize(ensureTupleSize)
  2436. {
  2437. }
  2438. bool DoInit(TContext& ctx, ISource* src) override {
  2439. ctx.PushBlockShortcuts();
  2440. if (!Node->Init(ctx, src)) {
  2441. return false;
  2442. }
  2443. Node = ctx.GroundBlockShortcutsForExpr(Node);
  2444. Node = Y("EnsureTupleSize", Node, Q(ToString(EnsureTupleSize)));
  2445. return true;
  2446. }
  2447. TAstNode* Translate(TContext& ctx) const override {
  2448. return Node->Translate(ctx);
  2449. }
  2450. TPtr DoClone() const final {
  2451. return {};
  2452. }
  2453. protected:
  2454. TNodePtr Node;
  2455. const int EnsureTupleSize;
  2456. };
  2457. TNodePtr BuildTupleResult(TNodePtr tuple, int ensureTupleSize) {
  2458. return new TTupleResultNode(std::move(tuple), ensureTupleSize);
  2459. }
  2460. } // namespace NSQLTranslationV0