builtin.cpp 129 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348
  1. #include "node.h"
  2. #include "context.h"
  3. #include "list_builtin.h"
  4. #include <ydb/library/yql/ast/yql_type_string.h>
  5. #include <ydb/library/yql/core/yql_expr_type_annotation.h>
  6. #include <ydb/library/yql/public/udf/udf_data_type.h>
  7. #include <ydb/library/yql/core/sql_types/simple_types.h>
  8. #include <ydb/library/yql/minikql/mkql_program_builder.h>
  9. #include <ydb/library/yql/minikql/mkql_type_ops.h>
  10. #include <ydb/library/yql/public/issue/yql_issue_id.h>
  11. #include <library/cpp/charset/ci_string.h>
  12. #include <library/cpp/yson/node/node_io.h>
  13. #include <util/string/builder.h>
  14. #include <util/string/cast.h>
  15. #include <util/string/util.h>
  16. #include <util/string/join.h>
  17. #include <util/system/env.h>
  18. #include <unordered_map>
  19. using namespace NYql;
  20. namespace NSQLTranslationV1 {
  21. extern const char SubqueryExtendFor[] = "SubqueryExtendFor";
  22. extern const char SubqueryUnionAllFor[] = "SubqueryUnionAllFor";
  23. extern const char SubqueryMergeFor[] = "SubqueryMergeFor";
  24. extern const char SubqueryUnionMergeFor[] = "SubqueryUnionMergeFor";
  25. extern const char SubqueryOrderBy[] = "SubqueryOrderBy";
  26. extern const char SubqueryAssumeOrderBy[] = "SubqueryAssumeOrderBy";
  27. TMaybe<TString> MakeTypeConfig(const TString& ns, const TVector<TNodePtr>& udfArgs) {
  28. if (ns == "clickhouse") {
  29. auto settings = NYT::TNode::CreateMap();
  30. auto args = NYT::TNode::CreateMap();
  31. for (ui32 i = 0; i < udfArgs.size(); ++i) {
  32. if (!udfArgs[i]->IsNull() && udfArgs[i]->IsLiteral()) {
  33. args[ToString(i)] = NYT::TNode()
  34. ("type", udfArgs[i]->GetLiteralType())
  35. ("value", udfArgs[i]->GetLiteralValue());
  36. }
  37. }
  38. settings["args"] = args;
  39. return NYT::NodeToYsonString(settings);
  40. }
  41. return Nothing();
  42. }
  43. class TGroupingNode final: public TAstListNode {
  44. public:
  45. TGroupingNode(TPosition pos, const TVector<TNodePtr>& args)
  46. : TAstListNode(pos)
  47. , Args(args)
  48. {}
  49. bool DoInit(TContext& ctx, ISource* src) final {
  50. if (!src) {
  51. ctx.Error(Pos) << "Grouping function should have source";
  52. return false;
  53. }
  54. TVector<TString> columns;
  55. columns.reserve(Args.size());
  56. const bool isJoin = src->GetJoin();
  57. ISource* composite = src->GetCompositeSource();
  58. for (const auto& node: Args) {
  59. auto namePtr = node->GetColumnName();
  60. if (!namePtr || !*namePtr) {
  61. ctx.Error(Pos) << "GROUPING function should use columns as arguments";
  62. return false;
  63. }
  64. TString column = *namePtr;
  65. if (isJoin) {
  66. auto sourceNamePtr = node->GetSourceName();
  67. if (sourceNamePtr && !sourceNamePtr->empty()) {
  68. column = DotJoin(*sourceNamePtr, column);
  69. }
  70. }
  71. if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, *namePtr) && (!composite || !composite->IsGroupByColumn(column))) {
  72. ctx.Error(node->GetPos()) << "Column '" << column << "' is not a grouping column";
  73. return false;
  74. }
  75. columns.emplace_back(column);
  76. }
  77. ui64 hint;
  78. if (!src->CalculateGroupingHint(ctx, columns, hint)) {
  79. return false;
  80. }
  81. Nodes.push_back(BuildAtom(Pos, "Uint64"));
  82. Nodes.push_back(BuildQuotedAtom(Pos, IntToString<10>(hint)));
  83. return TAstListNode::DoInit(ctx, src);
  84. }
  85. TNodePtr DoClone() const final {
  86. return new TGroupingNode(Pos, Args);
  87. }
  88. private:
  89. const TVector<TNodePtr> Args;
  90. };
  91. class TBasicAggrFunc final: public TAstListNode {
  92. public:
  93. TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
  94. : TAstListNode(pos)
  95. , Name(name)
  96. , Aggr(aggr)
  97. , Args(args)
  98. {}
  99. TCiString GetName() const {
  100. return Name;
  101. }
  102. bool DoInit(TContext& ctx, ISource* src) final {
  103. if (!src) {
  104. ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source";
  105. return false;
  106. }
  107. if (!DoInitAggregation(ctx, src)) {
  108. return false;
  109. }
  110. return TAstListNode::DoInit(ctx, src);
  111. }
  112. void CollectPreaggregateExprs(TContext& ctx, ISource& src, TVector<INode::TPtr>& exprs) override {
  113. if (Args.empty() || Aggr->GetAggregationMode() != EAggregateMode::Distinct) {
  114. return;
  115. }
  116. auto& expr = Args.front();
  117. // need to initialize expr before checking whether it is a column
  118. auto clone = expr->Clone();
  119. if (!clone->Init(ctx, &src)) {
  120. return;
  121. }
  122. const auto column = clone->GetColumnName();
  123. if (column) {
  124. return;
  125. }
  126. auto tmpColumn = src.MakeLocalName("_yql_preagg_" + Name);
  127. YQL_ENSURE(!expr->GetLabel());
  128. expr->SetLabel(tmpColumn);
  129. PreaggregateExpr = expr;
  130. exprs.push_back(PreaggregateExpr);
  131. expr = BuildColumn(expr->GetPos(), tmpColumn);
  132. Aggr->MarkKeyColumnAsGenerated();
  133. }
  134. TNodePtr DoClone() const final {
  135. TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
  136. return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args));
  137. }
  138. TAggregationPtr GetAggregation() const override {
  139. return Aggr;
  140. }
  141. private:
  142. bool DoInitAggregation(TContext& ctx, ISource* src) {
  143. if (PreaggregateExpr) {
  144. YQL_ENSURE(PreaggregateExpr->HasState(ENodeState::Initialized));
  145. if (PreaggregateExpr->IsAggregated() && !PreaggregateExpr->IsAggregationKey() && !Aggr->IsOverWindow()) {
  146. ctx.Error(Aggr->GetPos()) << "Aggregation of aggregated values is forbidden for no window functions";
  147. return false;
  148. }
  149. }
  150. if (!Aggr->InitAggr(ctx, false, src, *this, Args)) {
  151. return false;
  152. }
  153. return src->AddAggregation(ctx, Aggr);
  154. }
  155. void DoUpdateState() const final {
  156. State.Set(ENodeState::Const, !Args.empty() && AllOf(Args, [](const auto& arg){ return arg->IsConstant(); }));
  157. State.Set(ENodeState::Aggregated);
  158. }
  159. TNodePtr PreaggregateExpr;
  160. protected:
  161. const TString Name;
  162. TAggregationPtr Aggr;
  163. TVector<TNodePtr> Args;
  164. };
  165. class TBasicAggrFactory final : public TAstListNode {
  166. public:
  167. TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
  168. : TAstListNode(pos)
  169. , Name(name)
  170. , Aggr(aggr)
  171. , Args(args)
  172. {}
  173. TCiString GetName() const {
  174. return Name;
  175. }
  176. bool DoInit(TContext& ctx, ISource* src) final {
  177. if (!DoInitAggregation(ctx)) {
  178. return false;
  179. }
  180. auto factory = Aggr->AggregationTraitsFactory();
  181. auto apply = Y("Apply", factory, Y("ListType", "type"));
  182. auto columnIndices = Aggr->GetFactoryColumnIndices();
  183. if (columnIndices.size() == 1) {
  184. apply = L(apply, "extractor");
  185. } else {
  186. // make several extractors from main that returns a tuple
  187. for (ui32 arg = 0; arg < columnIndices.size(); ++arg) {
  188. auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg]))));
  189. apply = L(apply, partial);
  190. }
  191. }
  192. Aggr->AddFactoryArguments(apply);
  193. Lambda = BuildLambda(Pos, Y("type", "extractor"), apply);
  194. return TAstListNode::DoInit(ctx, src);
  195. }
  196. TAstNode* Translate(TContext& ctx) const override {
  197. return Lambda->Translate(ctx);
  198. }
  199. TNodePtr DoClone() const final {
  200. TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
  201. return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args));
  202. }
  203. TAggregationPtr GetAggregation() const override {
  204. return Aggr;
  205. }
  206. private:
  207. bool DoInitAggregation(TContext& ctx) {
  208. return Aggr->InitAggr(ctx, true, nullptr, *this, Args);
  209. }
  210. protected:
  211. const TString Name;
  212. TAggregationPtr Aggr;
  213. TVector<TNodePtr> Args;
  214. TNodePtr Lambda;
  215. };
  216. typedef THolder<TBasicAggrFunc> TAggrFuncPtr;
  217. class TLiteralStringAtom: public INode {
  218. public:
  219. TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info)
  220. : INode(pos)
  221. , Node(node)
  222. , Info(info)
  223. {
  224. }
  225. bool DoInit(TContext& ctx, ISource* src) override {
  226. Y_UNUSED(src);
  227. if (!Node) {
  228. ctx.Error(Pos) << Info;
  229. return false;
  230. }
  231. if (!Node->Init(ctx, src)) {
  232. return false;
  233. }
  234. Atom = MakeAtomFromExpression(ctx, Node).Build();
  235. return true;
  236. }
  237. bool IsLiteral() const override {
  238. return Atom ? Atom->IsLiteral() : false;
  239. }
  240. TString GetLiteralType() const override {
  241. return Atom ? Atom->GetLiteralType() : "";
  242. }
  243. TString GetLiteralValue() const override {
  244. return Atom ? Atom->GetLiteralValue() : "";
  245. }
  246. TAstNode* Translate(TContext& ctx) const override {
  247. return Atom->Translate(ctx);
  248. }
  249. TPtr DoClone() const final {
  250. return {};
  251. }
  252. void DoUpdateState() const override {
  253. YQL_ENSURE(Atom);
  254. State.Set(ENodeState::Const, Atom->IsConstant());
  255. State.Set(ENodeState::Aggregated, Atom->IsAggregated());
  256. State.Set(ENodeState::OverWindow, Atom->IsOverWindow());
  257. }
  258. private:
  259. TNodePtr Node;
  260. TNodePtr Atom;
  261. TString Info;
  262. };
  263. class TYqlAsAtom: public TLiteralStringAtom {
  264. public:
  265. TYqlAsAtom(TPosition pos, const TVector<TNodePtr>& args)
  266. : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument")
  267. {
  268. }
  269. };
  270. class TYqlData: public TCallNode {
  271. public:
  272. TYqlData(TPosition pos, const TString& type, const TVector<TNodePtr>& args)
  273. : TCallNode(pos, type, 1, 1, args)
  274. {
  275. }
  276. bool DoInit(TContext& ctx, ISource* src) override {
  277. auto slot = NUdf::FindDataSlot(GetOpName());
  278. if (!slot) {
  279. ctx.Error(Pos) << "Unexpected type " << GetOpName();
  280. return false;
  281. }
  282. if (*slot == NUdf::EDataSlot::Decimal) {
  283. MinArgs = MaxArgs = 3;
  284. }
  285. if (!ValidateArguments(ctx)) {
  286. return false;
  287. }
  288. auto stringNode = Args[0];
  289. auto atom = stringNode->GetLiteral("String");
  290. if (!atom) {
  291. ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function";
  292. return false;
  293. }
  294. TString value;
  295. if (*slot == NUdf::EDataSlot::Decimal) {
  296. const auto precision = Args[1]->GetLiteral("Int32");
  297. const auto scale = Args[2]->GetLiteral("Int32");
  298. if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) {
  299. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  300. return false;
  301. }
  302. ui8 stub;
  303. if (!(precision && TryFromString<ui8>(*precision, stub))) {
  304. ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName();
  305. return false;
  306. }
  307. if (!(scale && TryFromString<ui8>(*scale, stub))) {
  308. ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName();
  309. return false;
  310. }
  311. Args[0] = BuildQuotedAtom(GetPos(), *atom);
  312. Args[1] = BuildQuotedAtom(GetPos(), *precision);
  313. Args[2] = BuildQuotedAtom(GetPos(), *scale);
  314. return TCallNode::DoInit(ctx, src);
  315. } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) {
  316. const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom);
  317. if (!out) {
  318. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  319. return false;
  320. }
  321. switch (*slot) {
  322. case NUdf::EDataSlot::Date:
  323. case NUdf::EDataSlot::TzDate:
  324. value = ToString(out.Get<ui16>());
  325. break;
  326. case NUdf::EDataSlot::Datetime:
  327. case NUdf::EDataSlot::TzDatetime:
  328. value = ToString(out.Get<ui32>());
  329. break;
  330. case NUdf::EDataSlot::Timestamp:
  331. case NUdf::EDataSlot::TzTimestamp:
  332. value = ToString(out.Get<ui64>());
  333. break;
  334. case NUdf::EDataSlot::Interval:
  335. value = ToString(out.Get<i64>());
  336. if ('T' == atom->back()) {
  337. ctx.Error(Pos) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent.";
  338. return false;
  339. }
  340. break;
  341. default:
  342. Y_FAIL("Unexpected data slot");
  343. }
  344. if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) {
  345. value += ",";
  346. value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId());
  347. }
  348. } else if (NUdf::EDataSlot::Uuid == *slot) {
  349. char out[0x10];
  350. if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) {
  351. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  352. return false;
  353. }
  354. value.assign(out, sizeof(out));
  355. } else {
  356. if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) {
  357. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  358. return false;
  359. }
  360. value = *atom;
  361. }
  362. Args[0] = BuildQuotedAtom(GetPos(), value);
  363. return TCallNode::DoInit(ctx, src);
  364. }
  365. TPtr DoClone() const final {
  366. return new TYqlData(GetPos(), OpName, CloneContainer(Args));
  367. }
  368. };
  369. class TTableName : public TCallNode {
  370. public:
  371. TTableName(TPosition pos, const TVector<TNodePtr>& args, const TString& service)
  372. : TCallNode(pos, "TableName", 0, 2, args)
  373. , Service(service)
  374. , EmptyArgs(args.empty())
  375. {
  376. }
  377. bool DoInit(TContext& ctx, ISource* src) override {
  378. if (!ValidateArguments(ctx)) {
  379. return false;
  380. }
  381. if (Args.empty()) {
  382. if (!src) {
  383. ctx.Error(Pos) << "Unable to use TableName() without source";
  384. return false;
  385. }
  386. // TODO: TablePath() and TableRecordIndex() have more strict limitations
  387. if (src->GetJoin()) {
  388. ctx.Warning(Pos,
  389. TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() may produce empty result when used in ambiguous context (with JOIN)";
  390. }
  391. if (src->HasAggregations()) {
  392. ctx.Warning(Pos,
  393. TIssuesIds::YQL_EMPTY_TABLENAME_RESULT) << "TableName() will produce empty result when used with aggregation.\n"
  394. "Please consult https://yql.yandex-team.ru/docs/yt/builtins/basic/#tablepath for possible workaround";
  395. }
  396. Args.push_back(Y("TablePath", Y("DependsOn", "row")));
  397. }
  398. if (Args.size() == 2) {
  399. auto literal = Args[1]->GetLiteral("String");
  400. if (!literal) {
  401. ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function";
  402. return false;
  403. }
  404. Args[1] = BuildQuotedAtom(Args[1]->GetPos(), *literal);
  405. } else {
  406. if (Service.empty()) {
  407. ctx.Error(GetPos()) << GetOpName() << " requires either one of \"yt\"/\"kikimr\"/\"rtmr\" as second argument or current cluster name";
  408. return false;
  409. }
  410. Args.push_back(BuildQuotedAtom(GetPos(), Service));
  411. }
  412. return TCallNode::DoInit(ctx, src);
  413. }
  414. TPtr DoClone() const final {
  415. return new TTableName(GetPos(), CloneContainer(Args), Service);
  416. }
  417. void DoUpdateState() const override {
  418. if (EmptyArgs) {
  419. State.Set(ENodeState::Const, false);
  420. } else {
  421. TCallNode::DoUpdateState();
  422. }
  423. }
  424. private:
  425. TString Service;
  426. const bool EmptyArgs;
  427. };
  428. class TYqlParseType final : public INode {
  429. public:
  430. TYqlParseType(TPosition pos, const TVector<TNodePtr>& args)
  431. : INode(pos)
  432. , Args(args)
  433. {}
  434. TAstNode* Translate(TContext& ctx) const override {
  435. if (Args.size() != 1) {
  436. ctx.Error(Pos) << "Expected 1 argument in ParseType function";
  437. return nullptr;
  438. }
  439. auto literal = Args[0]->GetLiteral("String");
  440. if (!literal) {
  441. ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function";
  442. return nullptr;
  443. }
  444. auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
  445. if (!parsed) {
  446. ctx.Error(Args[0]->GetPos()) << "Failed to parse type";
  447. return nullptr;
  448. }
  449. return parsed;
  450. }
  451. TNodePtr DoClone() const final {
  452. return new TYqlParseType(Pos, Args);
  453. }
  454. void DoUpdateState() const final {
  455. State.Set(ENodeState::Const);
  456. }
  457. private:
  458. TVector<TNodePtr> Args;
  459. };
  460. class TYqlAddTimezone: public TCallNode {
  461. public:
  462. TYqlAddTimezone(TPosition pos, const TVector<TNodePtr>& args)
  463. : TCallNode(pos, "AddTimezone", 2, 2, args)
  464. {
  465. }
  466. bool DoInit(TContext& ctx, ISource* src) override {
  467. if (!ValidateArguments(ctx)) {
  468. return false;
  469. }
  470. Args[1] = Y("TimezoneId", Args[1]);
  471. return TCallNode::DoInit(ctx, src);
  472. }
  473. TNodePtr DoClone() const final {
  474. return new TYqlAddTimezone(Pos, CloneContainer(Args));
  475. }
  476. };
  477. class TYqlPgType: public TCallNode {
  478. public:
  479. TYqlPgType(TPosition pos, const TVector<TNodePtr>& args)
  480. : TCallNode(pos, "PgType", 1, 1, args)
  481. {
  482. }
  483. TNodePtr DoClone() const final {
  484. return new TYqlPgType(Pos, CloneContainer(Args));
  485. }
  486. TAstNode* Translate(TContext& ctx) const override {
  487. // argument is already a proper PgType callable - here we just return argument
  488. YQL_ENSURE(Nodes.size() == 2);
  489. return Nodes.back()->Translate(ctx);
  490. }
  491. };
  492. class TYqlPgConst : public TCallNode {
  493. public:
  494. TYqlPgConst(TPosition pos, const TVector<TNodePtr>& args)
  495. : TCallNode(pos, "PgConst", 2, 2, args)
  496. {
  497. }
  498. bool DoInit(TContext& ctx, ISource* src) override {
  499. if (!ValidateArguments(ctx)) {
  500. return false;
  501. }
  502. if (!Args[1]->Init(ctx, src)) {
  503. return false;
  504. }
  505. auto value = MakeAtomFromExpression(ctx, Args[1]).Build();
  506. Args[1] = value;
  507. return TCallNode::DoInit(ctx, src);
  508. }
  509. TNodePtr DoClone() const final {
  510. return new TYqlPgConst(Pos, Args);
  511. }
  512. };
  513. template <const char* Name>
  514. class TYqlSubqueryFor : public TCallNode {
  515. public:
  516. TYqlSubqueryFor(TPosition pos, const TVector<TNodePtr>& args)
  517. : TCallNode(pos, Name, 2, 2, args)
  518. {
  519. }
  520. bool DoInit(TContext& ctx, ISource* src) override {
  521. if (!ValidateArguments(ctx)) {
  522. return false;
  523. }
  524. Args[0] = Y("EvaluateExpr", Args[0]);
  525. return TCallNode::DoInit(ctx, src);
  526. }
  527. TNodePtr DoClone() const final {
  528. return new TYqlSubqueryFor<Name>(Pos, Args);
  529. }
  530. };
  531. template <const char* Name>
  532. class TYqlSubqueryOrderBy : public TCallNode {
  533. public:
  534. TYqlSubqueryOrderBy(TPosition pos, const TVector<TNodePtr>& args)
  535. : TCallNode(pos, Name, 2, 2, args)
  536. {
  537. }
  538. bool DoInit(TContext& ctx, ISource* src) override {
  539. if (!ValidateArguments(ctx)) {
  540. return false;
  541. }
  542. Args[1] = Y("EvaluateExpr", Args[1]);
  543. return TCallNode::DoInit(ctx, src);
  544. }
  545. TNodePtr DoClone() const final {
  546. return new TYqlSubqueryOrderBy<Name>(Pos, Args);
  547. }
  548. };
  549. template <bool Strict>
  550. class TYqlTypeAssert : public TCallNode {
  551. public:
  552. TYqlTypeAssert(TPosition pos, const TVector<TNodePtr>& args)
  553. : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args)
  554. {
  555. }
  556. bool DoInit(TContext& ctx, ISource* src) override {
  557. if (!ValidateArguments(ctx)) {
  558. return false;
  559. }
  560. if (!Args[1]->Init(ctx, src)) {
  561. return false;
  562. }
  563. if (Args.size() == 3) {
  564. if (!Args[2]->Init(ctx, src)) {
  565. return false;
  566. }
  567. auto message = MakeAtomFromExpression(ctx, Args[2]).Build();
  568. Args[2] = message;
  569. }
  570. return TCallNode::DoInit(ctx, src);
  571. }
  572. TNodePtr DoClone() const final {
  573. return new TYqlTypeAssert<Strict>(Pos, Args);
  574. }
  575. };
  576. class TFromBytes final : public TCallNode {
  577. public:
  578. TFromBytes(TPosition pos, const TVector<TNodePtr>& args)
  579. : TCallNode(pos, "FromBytes", 2, 2, args)
  580. {}
  581. bool DoInit(TContext& ctx, ISource* src) override {
  582. if (!ValidateArguments(ctx)) {
  583. return false;
  584. }
  585. if (!Args[1]->Init(ctx, src)) {
  586. return false;
  587. }
  588. Args[1] = MakeAtomFromExpression(ctx, Y("FormatType", Args[1])).Build();
  589. return TCallNode::DoInit(ctx, src);
  590. }
  591. TNodePtr DoClone() const final {
  592. return new TFromBytes(Pos, Args);
  593. }
  594. };
  595. class TYqlTaggedBase : public TCallNode {
  596. public:
  597. TYqlTaggedBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  598. : TCallNode(pos, opName, 2, 2, args)
  599. {}
  600. bool DoInit(TContext& ctx, ISource* src) override {
  601. if (!ValidateArguments(ctx)) {
  602. return false;
  603. }
  604. if (!Args[1]->Init(ctx, src)) {
  605. return false;
  606. }
  607. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  608. return TCallNode::DoInit(ctx, src);
  609. }
  610. };
  611. class TYqlAsTagged final : public TYqlTaggedBase {
  612. public:
  613. TYqlAsTagged(TPosition pos, const TVector<TNodePtr>& args)
  614. : TYqlTaggedBase(pos, "AsTagged", args)
  615. {}
  616. TNodePtr DoClone() const final {
  617. return new TYqlAsTagged(Pos, Args);
  618. }
  619. };
  620. class TYqlUntag final : public TYqlTaggedBase {
  621. public:
  622. TYqlUntag(TPosition pos, const TVector<TNodePtr>& args)
  623. : TYqlTaggedBase(pos, "Untag", args)
  624. {}
  625. TNodePtr DoClone() const final {
  626. return new TYqlUntag(Pos, Args);
  627. }
  628. };
  629. class TYqlVariant final : public TCallNode {
  630. public:
  631. TYqlVariant(TPosition pos, const TVector<TNodePtr>& args)
  632. : TCallNode(pos, "Variant", 3, 3, args)
  633. {}
  634. bool DoInit(TContext& ctx, ISource* src) override {
  635. if (!ValidateArguments(ctx)) {
  636. return false;
  637. }
  638. if (!Args[1]->Init(ctx, src)) {
  639. return false;
  640. }
  641. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  642. return TCallNode::DoInit(ctx, src);
  643. }
  644. TNodePtr DoClone() const final {
  645. return new TYqlVariant(Pos, Args);
  646. }
  647. };
  648. class TYqlEnum final : public TCallNode {
  649. public:
  650. TYqlEnum(TPosition pos, const TVector<TNodePtr>& args)
  651. : TCallNode(pos, "Enum", 2, 2, args)
  652. {}
  653. bool DoInit(TContext& ctx, ISource* src) override {
  654. if (!ValidateArguments(ctx)) {
  655. return false;
  656. }
  657. if (!Args[0]->Init(ctx, src)) {
  658. return false;
  659. }
  660. Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build();
  661. return TCallNode::DoInit(ctx, src);
  662. }
  663. TNodePtr DoClone() const final {
  664. return new TYqlEnum(Pos, Args);
  665. }
  666. };
  667. class TYqlAsVariant final : public TCallNode {
  668. public:
  669. TYqlAsVariant(TPosition pos, const TVector<TNodePtr>& args)
  670. : TCallNode(pos, "AsVariant", 2, 2, args)
  671. {}
  672. bool DoInit(TContext& ctx, ISource* src) override {
  673. if (!ValidateArguments(ctx)) {
  674. return false;
  675. }
  676. if (!Args[1]->Init(ctx, src)) {
  677. return false;
  678. }
  679. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  680. return TCallNode::DoInit(ctx, src);
  681. }
  682. TNodePtr DoClone() const final {
  683. return new TYqlAsVariant(Pos, Args);
  684. }
  685. };
  686. class TYqlAsEnum final : public TCallNode {
  687. public:
  688. TYqlAsEnum(TPosition pos, const TVector<TNodePtr>& args)
  689. : TCallNode(pos, "AsEnum", 1, 1, args)
  690. {}
  691. bool DoInit(TContext& ctx, ISource* src) override {
  692. if (!ValidateArguments(ctx)) {
  693. return false;
  694. }
  695. if (!Args[0]->Init(ctx, src)) {
  696. return false;
  697. }
  698. Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build();
  699. return TCallNode::DoInit(ctx, src);
  700. }
  701. TNodePtr DoClone() const final {
  702. return new TYqlAsEnum(Pos, Args);
  703. }
  704. };
  705. TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument) {
  706. return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter");
  707. }
  708. class TYqlAtom final: public TCallNode {
  709. public:
  710. TYqlAtom(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  711. : TCallNode(pos, opName, 1, 1, args)
  712. {}
  713. bool DoInit(TContext& ctx, ISource* src) override {
  714. if (!Args.empty()) {
  715. Args[0] = BuildFileNameArgument(ctx.Pos(), Args[0]);
  716. }
  717. return TCallNode::DoInit(ctx, src);
  718. }
  719. TNodePtr DoClone() const final {
  720. return new TYqlAtom(Pos, OpName, Args);
  721. }
  722. bool IsLiteral() const override {
  723. return !Args.empty() ? Args[0]->IsLiteral() : false;
  724. }
  725. TString GetLiteralType() const override {
  726. return !Args.empty() ? Args[0]->GetLiteralType() : "";
  727. }
  728. TString GetLiteralValue() const override {
  729. return !Args.empty() ? Args[0]->GetLiteralValue() : "";
  730. }
  731. };
  732. class TTryMember final: public TCallNode {
  733. public:
  734. TTryMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  735. : TCallNode(pos, opName, 3, 3, args)
  736. {}
  737. bool DoInit(TContext& ctx, ISource* src) override {
  738. if (Args.size() != 3) {
  739. ctx.Error(Pos) << OpName << " requires exactly three arguments";
  740. return false;
  741. }
  742. for (const auto& arg : Args) {
  743. if (!arg->Init(ctx, src)) {
  744. return false;
  745. }
  746. }
  747. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  748. return TCallNode::DoInit(ctx, src);
  749. }
  750. TNodePtr DoClone() const final {
  751. return new TTryMember(Pos, OpName, Args);
  752. }
  753. };
  754. class TAddMember final: public TCallNode {
  755. public:
  756. TAddMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  757. : TCallNode(pos, opName, 3, 3, args)
  758. {}
  759. bool DoInit(TContext& ctx, ISource* src) override {
  760. if (Args.size() != 3) {
  761. ctx.Error(Pos) << OpName << " requires exactly three arguments";
  762. return false;
  763. }
  764. for (const auto& arg : Args) {
  765. if (!arg->Init(ctx, src)) {
  766. return false;
  767. }
  768. }
  769. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  770. return TCallNode::DoInit(ctx, src);
  771. }
  772. TNodePtr DoClone() const final {
  773. return new TAddMember(Pos, OpName, Args);
  774. }
  775. };
  776. class TRemoveMember final: public TCallNode {
  777. public:
  778. TRemoveMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  779. : TCallNode(pos, opName, 2, 2, args)
  780. {}
  781. bool DoInit(TContext& ctx, ISource* src) override {
  782. if (Args.size() != 2) {
  783. ctx.Error(Pos) << OpName << " requires exactly two arguments";
  784. return false;
  785. }
  786. for (const auto& arg : Args) {
  787. if (!arg->Init(ctx, src)) {
  788. return false;
  789. }
  790. }
  791. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  792. return TCallNode::DoInit(ctx, src);
  793. }
  794. TNodePtr DoClone() const final {
  795. return new TRemoveMember(Pos, OpName, Args);
  796. }
  797. };
  798. class TCombineMembers final: public TCallNode {
  799. public:
  800. TCombineMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  801. : TCallNode(pos, opName, 1, -1, args)
  802. {}
  803. bool DoInit(TContext& ctx, ISource* src) override {
  804. if (Args.empty()) {
  805. ctx.Error(Pos) << "CombineMembers requires at least one argument";
  806. return false;
  807. }
  808. for (size_t i = 0; i < Args.size(); ++i) {
  809. Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix
  810. }
  811. return TCallNode::DoInit(ctx, src);
  812. }
  813. TNodePtr DoClone() const final {
  814. return new TCombineMembers(Pos, OpName, Args);
  815. }
  816. };
  817. class TFlattenMembers final: public TCallNode {
  818. public:
  819. TFlattenMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  820. : TCallNode(pos, opName, 1, -1, args)
  821. {}
  822. bool DoInit(TContext& ctx, ISource* src) override {
  823. if (Args.empty()) {
  824. ctx.Error(Pos) << OpName << " requires at least one argument";
  825. return false;
  826. }
  827. for (size_t i = 0; i < Args.size(); ++i) {
  828. if (!Args[i]->Init(ctx, src)) {
  829. return false;
  830. }
  831. if (Args[i]->GetTupleSize() == 2) {
  832. // flatten with prefix
  833. Args[i] = Q(Y(
  834. MakeAtomFromExpression(ctx, Args[i]->GetTupleElement(0)).Build(),
  835. Args[i]->GetTupleElement(1)
  836. ));
  837. } else {
  838. ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct";
  839. return false;
  840. }
  841. }
  842. return TCallNode::DoInit(ctx, src);
  843. }
  844. TNodePtr DoClone() const final {
  845. return new TFlattenMembers(Pos, OpName, Args);
  846. }
  847. };
  848. TString NormalizeTypeString(const TString& str) {
  849. auto ret = to_title(str);
  850. if (ret.StartsWith("Tz")) {
  851. ret = "Tz" + to_title(ret.substr(2));
  852. }
  853. if (ret.StartsWith("Json")) {
  854. ret = "Json" + to_title(ret.substr(4));
  855. }
  856. if (ret.StartsWith("Dy")) {
  857. ret = "Dy" + to_title(ret.substr(2));
  858. }
  859. return ret;
  860. }
  861. static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json", "JsonDocument",
  862. "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal", "DyNumber"};
  863. TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) {
  864. auto errMsgFunc = [&node, argNum]() {
  865. static std::array<TString, 2> numToName = {{"first", "second"}};
  866. TStringBuilder sb;
  867. sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: "
  868. << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";";
  869. return TString(sb);
  870. };
  871. auto typeStringNode = node.GetArgs().at(argNum);
  872. auto typeStringPtr = typeStringNode->GetLiteral("String");
  873. TNodePtr dataTypeNode;
  874. if (typeStringPtr) {
  875. TString typeString = NormalizeTypeString(*typeStringPtr);
  876. if (!AvailableDataTypes.contains(typeString)) {
  877. ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc();
  878. return {};
  879. }
  880. if (outTypeStrPtr) {
  881. *outTypeStrPtr = typeString;
  882. }
  883. dataTypeNode = typeStringNode->Q(typeString);
  884. } else {
  885. ctx.Error(typeStringNode->GetPos()) << errMsgFunc();
  886. return {};
  887. }
  888. return dataTypeNode;
  889. }
  890. class TYqlParseFileOp final: public TCallNode {
  891. public:
  892. TYqlParseFileOp(TPosition pos, const TVector<TNodePtr>& args)
  893. : TCallNode(pos, "ParseFile", 2, 2, args)
  894. {}
  895. bool DoInit(TContext& ctx, ISource* src) override {
  896. if (!ValidateArguments(ctx)) {
  897. return false;
  898. }
  899. auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
  900. if (!dataTypeStringNode) {
  901. return false;
  902. }
  903. auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1]);
  904. OpName = "Apply";
  905. Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"),
  906. Y("TupleType",
  907. Y("TupleType", Y("DataType", dataTypeStringNode)),
  908. Y("StructType"),
  909. Y("TupleType")));
  910. Args[1] = Y("FilePath", aliasNode);
  911. return TCallNode::DoInit(ctx, src);
  912. }
  913. TString GetOpName() const override {
  914. return "ParseFile";
  915. }
  916. TNodePtr DoClone() const final {
  917. return new TYqlParseFileOp(Pos, Args);
  918. }
  919. };
  920. class TYqlDataType final : public TCallNode {
  921. public:
  922. TYqlDataType(TPosition pos, const TVector<TNodePtr>& args)
  923. : TCallNode(pos, "DataType", 1, 3, args)
  924. {
  925. FakeSource = BuildFakeSource(pos);
  926. }
  927. bool DoInit(TContext& ctx, ISource* src) override {
  928. if (!ValidateArguments(ctx)) {
  929. return false;
  930. }
  931. for (ui32 i = 0; i < Args.size(); ++i) {
  932. if (!Args[i]->Init(ctx, FakeSource.Get())) {
  933. return false;
  934. }
  935. Args[i] = MakeAtomFromExpression(ctx, Args[i]).Build();
  936. }
  937. return TCallNode::DoInit(ctx, src);
  938. }
  939. TNodePtr DoClone() const final {
  940. return new TYqlDataType(Pos, Args);
  941. }
  942. private:
  943. TSourcePtr FakeSource;
  944. };
  945. class TYqlResourceType final : public TCallNode {
  946. public:
  947. TYqlResourceType(TPosition pos, const TVector<TNodePtr>& args)
  948. : TCallNode(pos, "ResourceType", 1, 1, args)
  949. {}
  950. bool DoInit(TContext& ctx, ISource* src) override {
  951. if (!ValidateArguments(ctx)) {
  952. return false;
  953. }
  954. if (!Args[0]->Init(ctx, src)) {
  955. return false;
  956. }
  957. Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build();
  958. return TCallNode::DoInit(ctx, src);
  959. }
  960. TNodePtr DoClone() const final {
  961. return new TYqlResourceType(Pos, Args);
  962. }
  963. };
  964. class TYqlTaggedType final : public TCallNode {
  965. public:
  966. TYqlTaggedType(TPosition pos, const TVector<TNodePtr>& args)
  967. : TCallNode(pos, "TaggedType", 2, 2, args)
  968. {}
  969. bool DoInit(TContext& ctx, ISource* src) override {
  970. if (!ValidateArguments(ctx)) {
  971. return false;
  972. }
  973. if (!Args[1]->Init(ctx, src)) {
  974. return false;
  975. }
  976. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  977. return TCallNode::DoInit(ctx, src);
  978. }
  979. TNodePtr DoClone() const final {
  980. return new TYqlTaggedType(Pos, Args);
  981. }
  982. };
  983. class TYqlCallableType final : public TCallNode {
  984. public:
  985. TYqlCallableType(TPosition pos, const TVector<TNodePtr>& args)
  986. : TCallNode(pos, "CallableType", 2, -1, args)
  987. {}
  988. bool DoInit(TContext& ctx, ISource* src) override {
  989. if (!ValidateArguments(ctx)) {
  990. return false;
  991. }
  992. if (!dynamic_cast<TTupleNode*>(Args[0].Get())) {
  993. ui32 numOptArgs;
  994. if (!Parseui32(Args[0], numOptArgs)) {
  995. ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments";
  996. return false;
  997. }
  998. Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs))));
  999. }
  1000. if (!dynamic_cast<TTupleNode*>(Args[1].Get())) {
  1001. Args[1] = Q(Y(Args[1]));
  1002. }
  1003. for (ui32 index = 2; index < Args.size(); ++index) {
  1004. if (!dynamic_cast<TTupleNode*>(Args[index].Get())) {
  1005. Args[index] = Q(Y(Args[index]));
  1006. }
  1007. }
  1008. return TCallNode::DoInit(ctx, src);
  1009. }
  1010. TNodePtr DoClone() const final {
  1011. return new TYqlCallableType(Pos, Args);
  1012. }
  1013. };
  1014. class TYqlTupleElementType final : public TCallNode {
  1015. public:
  1016. TYqlTupleElementType(TPosition pos, const TVector<TNodePtr>& args)
  1017. : TCallNode(pos, "TupleElementType", 2, 2, args)
  1018. {}
  1019. bool DoInit(TContext& ctx, ISource* src) override {
  1020. if (!ValidateArguments(ctx)) {
  1021. return false;
  1022. }
  1023. if (!Args[1]->Init(ctx, src)) {
  1024. return false;
  1025. }
  1026. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  1027. return TCallNode::DoInit(ctx, src);
  1028. }
  1029. TNodePtr DoClone() const final {
  1030. return new TYqlTupleElementType(Pos, Args);
  1031. }
  1032. };
  1033. class TYqlStructMemberType final : public TCallNode {
  1034. public:
  1035. TYqlStructMemberType(TPosition pos, const TVector<TNodePtr>& args)
  1036. : TCallNode(pos, "StructMemberType", 2, 2, args)
  1037. {}
  1038. bool DoInit(TContext& ctx, ISource* src) override {
  1039. if (!ValidateArguments(ctx)) {
  1040. return false;
  1041. }
  1042. if (!Args[1]->Init(ctx, src)) {
  1043. return false;
  1044. }
  1045. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  1046. return TCallNode::DoInit(ctx, src);
  1047. }
  1048. TNodePtr DoClone() const final {
  1049. return new TYqlStructMemberType(Pos, Args);
  1050. }
  1051. };
  1052. class TYqlCallableArgumentType final : public TCallNode {
  1053. public:
  1054. TYqlCallableArgumentType(TPosition pos, const TVector<TNodePtr>& args)
  1055. : TCallNode(pos, "CallableArgumentType", 2, 2, args)
  1056. {}
  1057. bool DoInit(TContext& ctx, ISource* src) override {
  1058. if (!ValidateArguments(ctx)) {
  1059. return false;
  1060. }
  1061. ui32 index;
  1062. if (!Parseui32(Args[1], index)) {
  1063. ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument";
  1064. return false;
  1065. }
  1066. Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index));
  1067. return TCallNode::DoInit(ctx, src);
  1068. }
  1069. TNodePtr DoClone() const final {
  1070. return new TYqlCallableArgumentType(Pos, Args);
  1071. }
  1072. };
  1073. class TStructTypeNode : public TAstListNode {
  1074. public:
  1075. TStructTypeNode(TPosition pos, const TVector<TNodePtr>& exprs)
  1076. : TAstListNode(pos)
  1077. , Exprs(exprs)
  1078. {}
  1079. bool DoInit(TContext& ctx, ISource* src) override {
  1080. Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default));
  1081. for (const auto& expr : Exprs) {
  1082. const auto& label = expr->GetLabel();
  1083. if (!label) {
  1084. ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
  1085. return false;
  1086. }
  1087. Nodes.push_back(Q(Y(Q(label), expr)));
  1088. }
  1089. return TAstListNode::DoInit(ctx, src);
  1090. }
  1091. TNodePtr DoClone() const final {
  1092. return new TStructTypeNode(Pos, CloneContainer(Exprs));
  1093. }
  1094. private:
  1095. const TVector<TNodePtr> Exprs;
  1096. };
  1097. template <bool IsStrict>
  1098. class TYqlIf final: public TCallNode {
  1099. public:
  1100. TYqlIf(TPosition pos, const TVector<TNodePtr>& args)
  1101. : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args)
  1102. {}
  1103. private:
  1104. TCallNode::TPtr DoClone() const override {
  1105. return new TYqlIf(GetPos(), CloneContainer(Args));
  1106. }
  1107. bool DoInit(TContext& ctx, ISource* src) override {
  1108. if (!ValidateArguments(ctx)) {
  1109. return false;
  1110. }
  1111. Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false")));
  1112. if (Args.size() == 2) {
  1113. Args.push_back(Y("Null"));
  1114. }
  1115. return TCallNode::DoInit(ctx, src);
  1116. }
  1117. };
  1118. class TYqlSubstring final: public TCallNode {
  1119. public:
  1120. TYqlSubstring(TPosition pos, const TString& name, const TVector<TNodePtr>& args)
  1121. : TCallNode(pos, name, 2, 3, args)
  1122. {}
  1123. private:
  1124. TCallNode::TPtr DoClone() const override {
  1125. return new TYqlSubstring(GetPos(), OpName, CloneContainer(Args));
  1126. }
  1127. bool DoInit(TContext& ctx, ISource* src) override {
  1128. if (Args.size() == 2) {
  1129. Args.push_back(Y("Null"));
  1130. }
  1131. return TCallNode::DoInit(ctx, src);
  1132. }
  1133. };
  1134. class TYqlIn final: public TCallNode {
  1135. public:
  1136. TYqlIn(TPosition pos, const TVector<TNodePtr>& args)
  1137. : TCallNode(pos, "IN", 3, 3, args)
  1138. {}
  1139. private:
  1140. TNodePtr DoClone() const final {
  1141. return new TYqlIn(Pos, CloneContainer(Args));
  1142. }
  1143. bool DoInit(TContext& ctx, ISource* src) override {
  1144. if (!ValidateArguments(ctx)) {
  1145. return false;
  1146. }
  1147. auto key = Args[0];
  1148. auto inNode = Args[1];
  1149. auto hints = Args[2];
  1150. const auto pos = inNode->GetPos();
  1151. if (!key->Init(ctx, src)) {
  1152. return false;
  1153. }
  1154. if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) {
  1155. return false;
  1156. }
  1157. if (inNode->GetLiteral("String")) {
  1158. ctx.Error(pos) << "Unable to use IN predicate with string argument, it won't search substring - "
  1159. "expecting tuple, list, dict or single column table source";
  1160. return false;
  1161. }
  1162. if (inNode->GetTupleSize() == 1) {
  1163. auto singleElement = inNode->GetTupleElement(0);
  1164. // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple
  1165. if (singleElement->GetSource() || singleElement->IsSelect()) {
  1166. TStringBuf parenKind = singleElement->GetSource() ? "" : "external ";
  1167. ctx.Warning(pos,
  1168. TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, "
  1169. << "perhaps you should remove "
  1170. << parenKind << "parenthesis here";
  1171. }
  1172. }
  1173. TVector<TNodePtr> hintElements;
  1174. for (size_t i = 0; i < hints->GetTupleSize(); ++i) {
  1175. hintElements.push_back(hints->GetTupleElement(i));
  1176. }
  1177. if (inNode->GetSource() || inNode->IsSelect()) {
  1178. hintElements.push_back(BuildHint(pos, "tableSource"));
  1179. }
  1180. if (!ctx.AnsiInForEmptyOrNullableItemsCollections.Defined()) {
  1181. hintElements.push_back(BuildHint(pos, "warnNoAnsi"));
  1182. } else if (*ctx.AnsiInForEmptyOrNullableItemsCollections) {
  1183. hintElements.push_back(BuildHint(pos, "ansi"));
  1184. }
  1185. OpName = "SqlIn";
  1186. MinArgs = MaxArgs = 3;
  1187. Args = {
  1188. inNode->GetSource() ? inNode->GetSource() : inNode,
  1189. key,
  1190. BuildTuple(pos, hintElements)
  1191. };
  1192. return TCallNode::DoInit(ctx, src);
  1193. }
  1194. static TNodePtr BuildHint(TPosition pos, const TString& name) {
  1195. return BuildTuple(pos, { BuildQuotedAtom(pos, name, NYql::TNodeFlags::Default) });
  1196. }
  1197. TString GetOpName() const override {
  1198. return "IN predicate";
  1199. }
  1200. };
  1201. class TYqlUdfBase : public TCallNode {
  1202. public:
  1203. TYqlUdfBase(TPosition pos, const TString& name)
  1204. : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name))
  1205. {}
  1206. TYqlUdfBase(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  1207. : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args))
  1208. {}
  1209. protected:
  1210. TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector<TNodePtr>& args)
  1211. : TCallNode(pos, opName, minArgs, maxArgs, args)
  1212. {}
  1213. private:
  1214. static TVector<TNodePtr> UdfArgs(TPosition pos, const TString& name, const TVector<TNodePtr>* args = nullptr) {
  1215. TVector<TNodePtr> res = { BuildQuotedAtom(pos, name) };
  1216. if (args) {
  1217. res.insert(res.end(), args->begin(), args->end());
  1218. }
  1219. return res;
  1220. }
  1221. void DoUpdateState() const override {
  1222. TCallNode::DoUpdateState();
  1223. State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/);
  1224. State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */);
  1225. }
  1226. private:
  1227. TNodePtr RunConfig;
  1228. };
  1229. class TYqlUdf final : public TYqlUdfBase {
  1230. public:
  1231. TYqlUdf(TPosition pos, const TString& name)
  1232. : TYqlUdfBase(pos, name)
  1233. {}
  1234. TYqlUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  1235. : TYqlUdfBase(pos, name, args, argsCount)
  1236. {}
  1237. private:
  1238. TYqlUdf(const TYqlUdf& other)
  1239. : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
  1240. {}
  1241. TNodePtr DoClone() const final {
  1242. return new TYqlUdf(*this);
  1243. }
  1244. };
  1245. class TYqlTypeConfigUdf final : public TYqlUdfBase {
  1246. public:
  1247. TYqlTypeConfigUdf(TPosition pos, const TString& name)
  1248. : TYqlUdfBase(pos, name)
  1249. {}
  1250. TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  1251. : TYqlUdfBase(pos, name, args, argsCount)
  1252. {}
  1253. private:
  1254. TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other)
  1255. : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
  1256. {}
  1257. bool DoInit(TContext& ctx, ISource* src) override {
  1258. if (!ValidateArguments(ctx)) {
  1259. return false;
  1260. }
  1261. if (!Args[3]->Init(ctx, src)) {
  1262. return false;
  1263. }
  1264. Args[3] = MakeAtomFromExpression(ctx, Args[3]).Build();
  1265. return TYqlUdfBase::DoInit(ctx, src);
  1266. }
  1267. TNodePtr DoClone() const final {
  1268. return new TYqlTypeConfigUdf(*this);
  1269. }
  1270. };
  1271. class TWeakFieldOp final: public TCallNode {
  1272. public:
  1273. TWeakFieldOp(TPosition pos, const TVector<TNodePtr>& args)
  1274. : TCallNode(pos, "WeakField", 2, 3, args)
  1275. {}
  1276. bool DoInit(TContext& ctx, ISource* src) override {
  1277. if (!src) {
  1278. ctx.Error(Pos) << GetCallExplain() << " unable use without source";
  1279. return false;
  1280. }
  1281. src->AllColumns();
  1282. if (!ValidateArguments(ctx)) {
  1283. return false;
  1284. }
  1285. bool hasError = false;
  1286. for (auto& arg: Args) {
  1287. if (!arg->Init(ctx, src)) {
  1288. hasError = true;
  1289. continue;
  1290. }
  1291. }
  1292. if (hasError) {
  1293. return false;
  1294. }
  1295. PrecacheState();
  1296. const auto memberPos = Args[0]->GetPos();
  1297. TVector<TNodePtr> repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)};
  1298. if (auto literal = Args[1]->GetLiteral("String")) {
  1299. TString targetType;
  1300. if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) {
  1301. return false;
  1302. }
  1303. repackArgs.push_back(Args[1]->Q(targetType));
  1304. } else {
  1305. repackArgs.push_back(Args[1]);
  1306. }
  1307. TVector<TNodePtr> column;
  1308. auto namePtr = Args[0]->GetColumnName();
  1309. if (!namePtr || !*namePtr) {
  1310. ctx.Error(Pos) << GetCallExplain() << " expects column name as first argument";
  1311. return false;
  1312. }
  1313. auto memberName = *namePtr;
  1314. column.push_back(Args[0]->Q(*namePtr));
  1315. if (src->GetJoin() && !src->IsJoinKeysInitializing()) {
  1316. const auto sourcePtr = Args[0]->GetSourceName();
  1317. if (!sourcePtr || !*sourcePtr) {
  1318. ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter";
  1319. return false;
  1320. }
  1321. column.push_back(Args[0]->Q(*sourcePtr));
  1322. memberName = DotJoin(*sourcePtr, memberName);
  1323. }
  1324. if (!GetLabel()) {
  1325. SetLabel(memberName);
  1326. }
  1327. repackArgs.push_back(BuildTuple(memberPos, column));
  1328. if (Args.size() == 3) {
  1329. repackArgs.push_back(Args[2]);
  1330. }
  1331. ++MinArgs;
  1332. ++MaxArgs;
  1333. Args.swap(repackArgs);
  1334. return TCallNode::DoInit(ctx, src);
  1335. }
  1336. TNodePtr DoClone() const final {
  1337. return new TWeakFieldOp(Pos, Args);
  1338. }
  1339. };
  1340. template <bool Join>
  1341. class TTableRow final : public INode {
  1342. public:
  1343. TTableRow(TPosition pos, const TVector<TNodePtr>& args)
  1344. : TTableRow(pos, args.size())
  1345. {}
  1346. TTableRow(TPosition pos, ui32 argsCount)
  1347. : INode(pos)
  1348. , ArgsCount(argsCount)
  1349. {}
  1350. bool DoInit(TContext& ctx, ISource* src) override {
  1351. if (!src || src->IsFake()) {
  1352. ctx.Error(Pos) << "TableRow requires data source";
  1353. return false;
  1354. }
  1355. if (ArgsCount > 0) {
  1356. ctx.Error(Pos) << "TableRow requires exactly 0 arguments";
  1357. return false;
  1358. }
  1359. src->AllColumns();
  1360. const bool isJoin = src->GetJoin();
  1361. if (!Join && ctx.SimpleColumns && isJoin) {
  1362. TNodePtr block = Y();
  1363. const auto& sameKeyMap = src->GetJoin()->GetSameKeysMap();
  1364. if (sameKeyMap) {
  1365. block = L(block, Y("let", "flatSameKeys", "row"));
  1366. for (const auto& sameKeysPair: sameKeyMap) {
  1367. const auto& column = sameKeysPair.first;
  1368. auto keys = Y("Coalesce");
  1369. auto sameSourceIter = sameKeysPair.second.begin();
  1370. for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) {
  1371. auto addKeyNode = Q(DotJoin(*sameSourceIter, column));
  1372. keys = L(keys, Y("TryMember", "row", addKeyNode, Y("Null")));
  1373. }
  1374. block = L(block, Y("let", "flatSameKeys", Y("AddMember", "flatSameKeys", Q(column), keys)));
  1375. sameSourceIter = sameKeysPair.second.begin();
  1376. for (auto end = sameKeysPair.second.end(); sameSourceIter != end; ++sameSourceIter) {
  1377. auto removeKeyNode = Q(DotJoin(*sameSourceIter, column));
  1378. block = L(block, Y("let", "flatSameKeys", Y("ForceRemoveMember", "flatSameKeys", removeKeyNode)));
  1379. }
  1380. }
  1381. block = L(block, Y("let", "row", "flatSameKeys"));
  1382. }
  1383. auto members = Y();
  1384. for (auto& joinLabel: src->GetJoin()->GetJoinLabels()) {
  1385. members = L(members, BuildQuotedAtom(Pos, joinLabel + "."));
  1386. }
  1387. block = L(block, Y("let", "res", Y("DivePrefixMembers", "row", Q(members))));
  1388. for (const auto& sameKeysPair: src->GetJoin()->GetSameKeysMap()) {
  1389. const auto& column = sameKeysPair.first;
  1390. auto addMemberKeyNode = Y("Member", "row", Q(column));
  1391. block = L(block, Y("let", "res", Y("AddMember", "res", Q(column), addMemberKeyNode)));
  1392. }
  1393. Node = Y("block", Q(L(block, Y("return", "res"))));
  1394. } else {
  1395. Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "row") : BuildAtom(Pos, "row", 0);
  1396. }
  1397. return true;
  1398. }
  1399. TAstNode* Translate(TContext& ctx) const override {
  1400. Y_VERIFY_DEBUG(Node);
  1401. return Node->Translate(ctx);
  1402. }
  1403. void DoUpdateState() const override {
  1404. State.Set(ENodeState::Const, false);
  1405. }
  1406. TNodePtr DoClone() const final {
  1407. return new TTableRow<Join>(Pos, ArgsCount);
  1408. }
  1409. private:
  1410. const size_t ArgsCount;
  1411. TNodePtr Node;
  1412. };
  1413. TTableRows::TTableRows(TPosition pos, const TVector<TNodePtr>& args)
  1414. : TTableRows(pos, args.size())
  1415. {}
  1416. TTableRows::TTableRows(TPosition pos, ui32 argsCount)
  1417. : INode(pos)
  1418. , ArgsCount(argsCount)
  1419. {}
  1420. bool TTableRows::DoInit(TContext& ctx, ISource* /*src*/) {
  1421. if (ArgsCount > 0) {
  1422. ctx.Error(Pos) << "TableRows requires exactly 0 arguments";
  1423. return false;
  1424. }
  1425. Node = ctx.EnableSystemColumns ? Y("RemoveSystemMembers", "inputRowsList") : BuildAtom(Pos, "inputRowsList", 0);
  1426. return true;
  1427. }
  1428. TAstNode* TTableRows::Translate(TContext& ctx) const {
  1429. Y_VERIFY_DEBUG(Node);
  1430. return Node->Translate(ctx);
  1431. }
  1432. void TTableRows::DoUpdateState() const {
  1433. State.Set(ENodeState::Const, false);
  1434. }
  1435. TNodePtr TTableRows::DoClone() const {
  1436. return MakeIntrusive<TTableRows>(Pos, ArgsCount);
  1437. }
  1438. TSessionWindow::TSessionWindow(TPosition pos, const TVector<TNodePtr>& args)
  1439. : INode(pos)
  1440. , Args(args)
  1441. , FakeSource(BuildFakeSource(pos))
  1442. , Valid(false)
  1443. {}
  1444. void TSessionWindow::MarkValid() {
  1445. YQL_ENSURE(!HasState(ENodeState::Initialized));
  1446. Valid = true;
  1447. }
  1448. TNodePtr TSessionWindow::BuildTraits(const TString& label) const {
  1449. YQL_ENSURE(HasState(ENodeState::Initialized));
  1450. auto trueNode = Y("Bool", Q("true"));
  1451. if (Args.size() == 2) {
  1452. auto timeExpr = Args[0];
  1453. auto timeoutExpr = Args[1];
  1454. auto coalesceLess = [&](auto first, auto second) {
  1455. // first < second ?? true
  1456. return Y("Coalesce", Y("<", first, second), trueNode);
  1457. };
  1458. auto absDelta = Y("If",
  1459. coalesceLess("prev", "curr"),
  1460. Y("-", "curr", "prev"),
  1461. Y("-", "prev", "curr"));
  1462. auto newSessionPred = Y("And", Y("AggrNotEquals", "curr", "prev"), coalesceLess(timeoutExpr, absDelta));
  1463. auto timeoutLambda = BuildLambda(timeoutExpr->GetPos(), Y("prev", "curr"), newSessionPred);
  1464. auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", timeExpr)));
  1465. return Y("SessionWindowTraits",
  1466. Y("TypeOf", label),
  1467. sortSpec,
  1468. BuildLambda(Pos, Y("row"), timeExpr),
  1469. timeoutLambda);
  1470. }
  1471. auto orderExpr = Args[0];
  1472. auto initLambda = Args[1];
  1473. auto updateLambda = Args[2];
  1474. auto calculateLambda = Args[3];
  1475. auto sortSpec = Y("SortTraits", Y("TypeOf", label), trueNode, BuildLambda(Pos, Y("row"), Y("PersistableRepr", orderExpr)));
  1476. return Y("SessionWindowTraits",
  1477. Y("TypeOf", label),
  1478. sortSpec,
  1479. initLambda,
  1480. updateLambda,
  1481. calculateLambda);
  1482. }
  1483. bool TSessionWindow::DoInit(TContext& ctx, ISource* src) {
  1484. if (!src || src->IsFake()) {
  1485. ctx.Error(Pos) << "SessionWindow requires data source";
  1486. return false;
  1487. }
  1488. if (!(Args.size() == 2 || Args.size() == 4)) {
  1489. ctx.Error(Pos) << "SessionWindow requires either two or four arguments";
  1490. return false;
  1491. }
  1492. if (!Valid) {
  1493. ctx.Error(Pos) << "SessionWindow can only be used as a top-level GROUP BY / PARTITION BY expression";
  1494. return false;
  1495. }
  1496. if (Args.size() == 2) {
  1497. auto timeExpr = Args[0];
  1498. auto timeoutExpr = Args[1];
  1499. return timeExpr->Init(ctx, src) && timeoutExpr->Init(ctx, FakeSource.Get());
  1500. }
  1501. auto orderExpr = Args[0];
  1502. auto initLambda = Args[1];
  1503. auto updateLambda = Args[2];
  1504. auto calculateLambda = Args[3];
  1505. return orderExpr->Init(ctx, src) && initLambda->Init(ctx, FakeSource.Get()) &&
  1506. updateLambda->Init(ctx, FakeSource.Get()) && calculateLambda->Init(ctx, FakeSource.Get());
  1507. }
  1508. TAstNode* TSessionWindow::Translate(TContext&) const {
  1509. YQL_ENSURE(false, "Translate is called for SessionWindow");
  1510. return nullptr;
  1511. }
  1512. void TSessionWindow::DoUpdateState() const {
  1513. State.Set(ENodeState::Const, false);
  1514. }
  1515. TNodePtr TSessionWindow::DoClone() const {
  1516. return new TSessionWindow(Pos, CloneContainer(Args));
  1517. }
  1518. TString TSessionWindow::GetOpName() const {
  1519. return "SessionWindow";
  1520. }
  1521. template<bool IsStart>
  1522. class TSessionStart final : public INode {
  1523. public:
  1524. TSessionStart(TPosition pos, const TVector<TNodePtr>& args)
  1525. : INode(pos)
  1526. , ArgsCount(args.size())
  1527. {
  1528. }
  1529. private:
  1530. TSessionStart(TPosition pos, size_t argsCount)
  1531. : INode(pos)
  1532. , ArgsCount(argsCount)
  1533. {}
  1534. bool DoInit(TContext& ctx, ISource* src) override {
  1535. if (!src || src->IsFake()) {
  1536. ctx.Error(Pos) << GetOpName() << " requires data source";
  1537. return false;
  1538. }
  1539. if (ArgsCount > 0) {
  1540. ctx.Error(Pos) << GetOpName() << " requires exactly 0 arguments";
  1541. return false;
  1542. }
  1543. auto windowName = src->GetWindowName();
  1544. OverWindow = windowName != nullptr;
  1545. TNodePtr sessionWindow;
  1546. if (windowName) {
  1547. auto spec = src->FindWindowSpecification(ctx, *windowName);
  1548. if (!spec) {
  1549. return false;
  1550. }
  1551. sessionWindow = spec->Session;
  1552. if (!sessionWindow) {
  1553. ctx.Error(Pos) << GetOpName() << " can not be used with window " << *windowName << ": SessionWindow specification is missing in PARTITION BY";
  1554. return false;
  1555. }
  1556. } else {
  1557. sessionWindow = src->GetSessionWindowSpec();
  1558. if (!sessionWindow) {
  1559. TString extra;
  1560. if (src->IsOverWindowSource()) {
  1561. extra = ". Maybe you forgot to add OVER `window_name`?";
  1562. }
  1563. if (src->HasAggregations()) {
  1564. ctx.Error(Pos) << GetOpName() << " can not be used here: SessionWindow specification is missing in GROUP BY" << extra;
  1565. } else {
  1566. ctx.Error(Pos) << GetOpName() << " can not be used without aggregation by SessionWindow" << extra;
  1567. }
  1568. return false;
  1569. }
  1570. if (!IsStart) {
  1571. ctx.Error(Pos) << GetOpName() << " with GROUP BY is not supported yet";
  1572. return false;
  1573. }
  1574. }
  1575. YQL_ENSURE(sessionWindow->GetLabel());
  1576. Node = Y("Member", "row", BuildQuotedAtom(Pos, sessionWindow->GetLabel()));
  1577. if (OverWindow) {
  1578. Node = Y("Member", Node, BuildQuotedAtom(Pos, IsStart ? "start" : "state"));
  1579. }
  1580. return true;
  1581. }
  1582. TAstNode* Translate(TContext& ctx) const override {
  1583. Y_VERIFY_DEBUG(Node);
  1584. return Node->Translate(ctx);
  1585. }
  1586. void DoUpdateState() const override {
  1587. State.Set(ENodeState::Const, false);
  1588. if (OverWindow) {
  1589. State.Set(ENodeState::OverWindow, true);
  1590. } else if (IsStart) {
  1591. State.Set(ENodeState::Aggregated, true);
  1592. }
  1593. }
  1594. TNodePtr DoClone() const override {
  1595. return new TSessionStart<IsStart>(Pos, ArgsCount);
  1596. }
  1597. TString GetOpName() const override {
  1598. return IsStart ? "SessionStart" : "SessionState";
  1599. }
  1600. const size_t ArgsCount;
  1601. bool OverWindow = false;
  1602. TNodePtr Node;
  1603. };
  1604. TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector<TNodePtr>& args, TNodePtr customUserType) {
  1605. TVector<TNodePtr> argsTypeItems;
  1606. for (auto& arg : args) {
  1607. argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector<TNodePtr>(1, arg)));
  1608. }
  1609. TVector<TNodePtr> userTypeItems;
  1610. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems));
  1611. userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {}));
  1612. if (customUserType) {
  1613. userTypeItems.push_back(customUserType);
  1614. } else {
  1615. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
  1616. }
  1617. return new TCallNodeImpl(pos, "TupleType", userTypeItems);
  1618. }
  1619. TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
  1620. TVector<TNodePtr> userTypeItems;
  1621. userTypeItems.reserve(3);
  1622. userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs));
  1623. userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs));
  1624. if (customUserType) {
  1625. userTypeItems.push_back(customUserType);
  1626. } else {
  1627. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
  1628. }
  1629. return new TCallNodeImpl(pos, "TupleType", userTypeItems);
  1630. }
  1631. TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args,
  1632. TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TMaybe<TString> typeConfig) {
  1633. if (!ctx.Settings.EnableGenericUdfs) {
  1634. return {};
  1635. }
  1636. TVector<TNodePtr> udfArgs;
  1637. udfArgs.push_back(new TAstListNodeImpl(pos));
  1638. udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0));
  1639. if (namedArgs) {
  1640. udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType));
  1641. } else {
  1642. udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType));
  1643. }
  1644. if (typeConfig) {
  1645. udfArgs.push_back(BuildQuotedAtom(pos, *typeConfig));
  1646. }
  1647. return udfArgs;
  1648. }
  1649. TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
  1650. TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, TMaybe<TString> typeConfig)
  1651. {
  1652. const TString fullName = module + "." + name;
  1653. TNodePtr callable;
  1654. if (to_lower(module) == "@yql") {
  1655. callable = BuildCallable(pos, module, name, {});
  1656. } else if (!ctx.Settings.EnableGenericUdfs) {
  1657. auto varName = ctx.AddSimpleUdf(fullName);
  1658. callable = new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
  1659. }
  1660. if (callable) {
  1661. TVector<TNodePtr> applyArgs = { callable };
  1662. applyArgs.insert(applyArgs.end(), args.begin(), args.end());
  1663. return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
  1664. }
  1665. TVector<TNodePtr> sqlCallArgs;
  1666. sqlCallArgs.push_back(BuildQuotedAtom(pos, fullName));
  1667. if (namedArgs) {
  1668. auto tupleNodePtr = dynamic_cast<const TTupleNode*>(positionalArgs.Get());
  1669. YQL_ENSURE(tupleNodePtr);
  1670. TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", tupleNodePtr->Elements());
  1671. sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode, namedArgs }));
  1672. } else {
  1673. TNodePtr positionalArgsNode = new TCallNodeImpl(pos, "PositionalArgs", args);
  1674. sqlCallArgs.push_back(BuildTuple(pos, { positionalArgsNode }));
  1675. }
  1676. // optional arguments
  1677. if (customUserType) {
  1678. sqlCallArgs.push_back(customUserType);
  1679. } else if (typeConfig) {
  1680. sqlCallArgs.push_back(new TCallNodeImpl(pos, "TupleType", {}));
  1681. }
  1682. if (typeConfig) {
  1683. sqlCallArgs.push_back(BuildQuotedAtom(pos, *typeConfig));
  1684. }
  1685. return new TCallNodeImpl(pos, "SqlCall", sqlCallArgs);
  1686. }
  1687. class TCallableNode final: public INode {
  1688. public:
  1689. TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce)
  1690. : INode(pos)
  1691. , Module(module)
  1692. , Name(name)
  1693. , Args(args)
  1694. , ForReduce(forReduce)
  1695. {}
  1696. bool DoInit(TContext& ctx, ISource* src) override {
  1697. if (Module == "yql") {
  1698. Node = new TFuncNodeImpl(Pos, Name);
  1699. } else if (Module == "@yql") {
  1700. auto parsedName = StringContent(ctx, Pos, Name);
  1701. if (!parsedName) {
  1702. return false;
  1703. }
  1704. const TString yql("(" + parsedName->Content + ")");
  1705. TAstParseResult ast = ParseAst(yql, ctx.Pool.get());
  1706. /// TODO: do not drop warnings
  1707. if (ast.IsOk()) {
  1708. const auto rootCount = ast.Root->GetChildrenCount();
  1709. if (rootCount != 1) {
  1710. ctx.Error(Pos) << "Failed to parse YQL: expecting AST root node with single child, but got " << rootCount;
  1711. return false;
  1712. }
  1713. Node = AstNode(ast.Root->GetChild(0));
  1714. } else {
  1715. ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString();
  1716. return false;
  1717. }
  1718. if (src) {
  1719. src->AllColumns();
  1720. }
  1721. } else if (ctx.Settings.ModuleMapping.contains(Module)) {
  1722. Node = Y("bind", Module + "_module", Q(Name));
  1723. if (src) {
  1724. src->AllColumns();
  1725. }
  1726. } else {
  1727. TNodePtr customUserType = nullptr;
  1728. if (Module == "Tensorflow" && Name == "RunBatch") {
  1729. if (Args.size() > 2) {
  1730. auto passThroughAtom = Q("PassThrough");
  1731. auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom);
  1732. customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType);
  1733. Args.erase(Args.begin() + 2);
  1734. }
  1735. }
  1736. if ("Datetime" == Module || ("Yson" == Module && ctx.PragmaYsonFast))
  1737. Module.append('2');
  1738. TMaybe<TString> typeConfig = MakeTypeConfig(to_lower(Module), Args);
  1739. if (ForReduce) {
  1740. TVector<TNodePtr> udfArgs;
  1741. udfArgs.push_back(BuildQuotedAtom(Pos, TString(Module) + "." + Name));
  1742. udfArgs.push_back(customUserType ? customUserType : new TCallNodeImpl(Pos, "TupleType", {}));
  1743. if (typeConfig) {
  1744. udfArgs.push_back(BuildQuotedAtom(Pos, *typeConfig));
  1745. }
  1746. Node = new TCallNodeImpl(Pos, "SqlReduceUdf", udfArgs);
  1747. } else {
  1748. auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType, typeConfig);
  1749. Node = BuildUdf(ctx, Pos, Module, Name, udfArgs);
  1750. }
  1751. }
  1752. return Node->Init(ctx, src);
  1753. }
  1754. TAstNode* Translate(TContext& ctx) const override {
  1755. Y_VERIFY_DEBUG(Node);
  1756. return Node->Translate(ctx);
  1757. }
  1758. const TString* FuncName() const override {
  1759. return &Name;
  1760. }
  1761. const TString* ModuleName() const override {
  1762. return &Module;
  1763. }
  1764. void DoUpdateState() const override {
  1765. State.Set(ENodeState::Const, Node->IsConstant());
  1766. State.Set(ENodeState::Aggregated, Node->IsAggregated());
  1767. }
  1768. TNodePtr DoClone() const override {
  1769. return new TCallableNode(Pos, Module, Name, Args, ForReduce);
  1770. }
  1771. void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
  1772. Y_VERIFY_DEBUG(Node);
  1773. Node->VisitTree(func, visited);
  1774. }
  1775. private:
  1776. TCiString Module;
  1777. TString Name;
  1778. TVector<TNodePtr> Args;
  1779. TNodePtr Node;
  1780. const bool ForReduce;
  1781. };
  1782. TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args, bool forReduce) {
  1783. return new TCallableNode(pos, module, name, args, forReduce);
  1784. }
  1785. TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
  1786. if (to_lower(module) == "@yql") {
  1787. return BuildCallable(pos, module, name, args);
  1788. }
  1789. auto fullName = module + "." + name;
  1790. if (!args.empty()) {
  1791. return new TYqlUdf(pos, fullName, args, args.size() + 1);
  1792. } else {
  1793. auto varName = ctx.AddSimpleUdf(fullName);
  1794. return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
  1795. }
  1796. }
  1797. class TScriptUdf final: public INode {
  1798. public:
  1799. TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args)
  1800. : INode(pos)
  1801. , ModuleName(moduleName)
  1802. , FuncName(funcName)
  1803. , Args(args)
  1804. {}
  1805. bool DoInit(TContext& ctx, ISource* src) override {
  1806. const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos;
  1807. if (!isPython) {
  1808. if (Args.size() != 2) {
  1809. ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters";
  1810. return false;
  1811. }
  1812. } else {
  1813. if (Args.size() < 1 || Args.size() > 2) {
  1814. ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters";
  1815. return false;
  1816. }
  1817. }
  1818. auto nameAtom = BuildQuotedAtom(Pos, FuncName);
  1819. auto scriptNode = Args.back();
  1820. if (!scriptNode->Init(ctx, src)) {
  1821. return false;
  1822. }
  1823. auto scriptStrPtr = Args.back()->GetLiteral("String");
  1824. if (scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) {
  1825. scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode));
  1826. }
  1827. INode::TPtr type;
  1828. if (Args.size() == 2) {
  1829. type = Args[0];
  1830. } else {
  1831. // Python supports getting functions signatures right from docstrings
  1832. type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply",
  1833. Y("bind", "core_module", Q("PythonFuncSignature")),
  1834. Q(ModuleName),
  1835. scriptNode,
  1836. Y("String", nameAtom)
  1837. )));
  1838. }
  1839. if (!type->Init(ctx, src)) {
  1840. return false;
  1841. }
  1842. Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode);
  1843. return true;
  1844. }
  1845. TAstNode* Translate(TContext& ctx) const override {
  1846. Y_UNUSED(ctx);
  1847. Y_VERIFY_DEBUG(Node);
  1848. return Node->Translate(ctx);
  1849. }
  1850. void DoUpdateState() const override {
  1851. State.Set(ENodeState::Const, true);
  1852. }
  1853. TNodePtr DoClone() const final {
  1854. return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args));
  1855. }
  1856. void DoVisitChildren(const TVisitFunc& func, TVisitNodeSet& visited) const final {
  1857. Y_VERIFY_DEBUG(Node);
  1858. Node->VisitTree(func, visited);
  1859. }
  1860. private:
  1861. TString ModuleName;
  1862. TString FuncName;
  1863. TVector<TNodePtr> Args;
  1864. TNodePtr Node;
  1865. };
  1866. template <bool Sorted>
  1867. class TYqlToDict final: public TCallNode {
  1868. public:
  1869. TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args)
  1870. : TCallNode(pos, "ToDict", 4, 4, args)
  1871. , Mode(mode)
  1872. {}
  1873. private:
  1874. TCallNode::TPtr DoClone() const override {
  1875. return new TYqlToDict<Sorted>(GetPos(), Mode, CloneContainer(Args));
  1876. }
  1877. bool DoInit(TContext& ctx, ISource* src) override {
  1878. if (Args.size() != 1) {
  1879. ctx.Error(Pos) << "ToDict required exactly one argument";
  1880. return false;
  1881. }
  1882. Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0"))));
  1883. Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1"))));
  1884. Args.push_back(Q(Y(Q(Sorted ? "Sorted" : "Hashed"), Q(Mode))));
  1885. return TCallNode::DoInit(ctx, src);
  1886. }
  1887. private:
  1888. TString Mode;
  1889. };
  1890. template <bool IsStart>
  1891. class THoppingTime final: public TAstListNode {
  1892. public:
  1893. THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {})
  1894. : TAstListNode(pos)
  1895. {
  1896. Y_UNUSED(args);
  1897. }
  1898. private:
  1899. TNodePtr DoClone() const override {
  1900. return new THoppingTime(GetPos());
  1901. }
  1902. bool DoInit(TContext& ctx, ISource* src) override {
  1903. Y_UNUSED(ctx);
  1904. auto window = src->GetHoppingWindowSpec();
  1905. if (!window) {
  1906. ctx.Error(Pos) << "No hopping window parameters in aggregation";
  1907. return false;
  1908. }
  1909. Nodes.clear();
  1910. if (!IsStart) {
  1911. Add("Member", "row", Q("_yql_time"));
  1912. return true;
  1913. }
  1914. Add("Sub",
  1915. Y("Member", "row", Q("_yql_time")),
  1916. window->Interval);
  1917. return true;
  1918. }
  1919. void DoUpdateState() const override {
  1920. State.Set(ENodeState::Aggregated, true);
  1921. }
  1922. };
  1923. class TInvalidBuiltin final: public INode {
  1924. public:
  1925. TInvalidBuiltin(TPosition pos, const TString& info)
  1926. : INode(pos)
  1927. , Info(info)
  1928. {
  1929. }
  1930. bool DoInit(TContext& ctx, ISource*) override {
  1931. ctx.Error(Pos) << Info;
  1932. return false;
  1933. }
  1934. TAstNode* Translate(TContext&) const override {
  1935. return nullptr;
  1936. }
  1937. TPtr DoClone() const override {
  1938. return {};
  1939. }
  1940. private:
  1941. TString Info;
  1942. };
  1943. enum EAggrFuncTypeCallback {
  1944. NORMAL,
  1945. KEY_PAYLOAD,
  1946. PAYLOAD_PREDICATE,
  1947. TWO_ARGS,
  1948. COUNT,
  1949. HISTOGRAM,
  1950. LINEAR_HISTOGRAM,
  1951. PERCENTILE,
  1952. TOPFREQ,
  1953. TOP,
  1954. TOP_BY,
  1955. COUNT_DISTINCT_ESTIMATE,
  1956. LIST,
  1957. UDAF
  1958. };
  1959. struct TCoreFuncInfo {
  1960. TString Name;
  1961. ui32 MinArgs;
  1962. ui32 MaxArgs;
  1963. };
  1964. using TAggrFuncFactoryCallback = std::function<INode::TPtr(TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory)>;
  1965. using TAggrFuncFactoryCallbackMap = std::unordered_map<TString, TAggrFuncFactoryCallback, THash<TString>>;
  1966. using TBuiltinFactoryCallback = std::function<TNodePtr(TPosition pos, const TVector<TNodePtr>& args)>;
  1967. using TBuiltinFactoryCallbackMap = std::unordered_map<TString, TBuiltinFactoryCallback, THash<TString>>;
  1968. using TCoreFuncMap = std::unordered_map<TString, TCoreFuncInfo, THash<TString>>;
  1969. TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
  1970. const TString& functionName,
  1971. const TString& factoryName,
  1972. EAggrFuncTypeCallback type = NORMAL,
  1973. const TString& functionNameOverride = TString(),
  1974. const TVector<EAggregateMode>& validModes = {}) {
  1975. const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride;
  1976. return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr {
  1977. if (!validModes.empty()) {
  1978. if (!IsIn(validModes, aggMode)) {
  1979. TString errorText;
  1980. if (TVector{EAggregateMode::OverWindow} == validModes) {
  1981. errorText = TStringBuilder()
  1982. << "Can't use window function " << functionName << " without window specification (OVER keyword is missing)";
  1983. } else {
  1984. errorText = TStringBuilder()
  1985. << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode";
  1986. }
  1987. return INode::TPtr(new TInvalidBuiltin(pos, errorText));
  1988. }
  1989. }
  1990. TAggregationPtr factory = nullptr;
  1991. switch (type) {
  1992. case NORMAL:
  1993. factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1994. break;
  1995. case KEY_PAYLOAD:
  1996. factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1997. break;
  1998. case PAYLOAD_PREDICATE:
  1999. factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2000. break;
  2001. case TWO_ARGS:
  2002. factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2003. break;
  2004. case COUNT:
  2005. factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode);
  2006. break;
  2007. case HISTOGRAM:
  2008. factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2009. break;
  2010. case LINEAR_HISTOGRAM:
  2011. factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2012. break;
  2013. case PERCENTILE:
  2014. factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2015. break;
  2016. case TOPFREQ:
  2017. factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2018. break;
  2019. case TOP:
  2020. factory = BuildTopFactoryAggregation<false>(pos, realFunctionName, factoryName, aggMode);
  2021. break;
  2022. case TOP_BY:
  2023. factory = BuildTopFactoryAggregation<true>(pos, realFunctionName, factoryName, aggMode);
  2024. break;
  2025. case COUNT_DISTINCT_ESTIMATE:
  2026. factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2027. break;
  2028. case LIST:
  2029. factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2030. break;
  2031. case UDAF:
  2032. factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  2033. break;
  2034. }
  2035. if (isFactory) {
  2036. auto realArgs = args;
  2037. realArgs.erase(realArgs.begin()); // skip function name
  2038. return new TBasicAggrFactory(pos, functionName, factory, realArgs);
  2039. } else {
  2040. return new TBasicAggrFunc(pos, functionName, factory, args);
  2041. }
  2042. };
  2043. }
  2044. TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
  2045. const TString& functionName,
  2046. const TString& factoryName,
  2047. const TVector<EAggregateMode>& validModes,
  2048. EAggrFuncTypeCallback type = NORMAL,
  2049. const TString& functionNameOverride = TString()) {
  2050. return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes);
  2051. }
  2052. template<typename TType>
  2053. TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() {
  2054. return [] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2055. return new TType(pos, args);
  2056. };
  2057. }
  2058. template<typename TType>
  2059. TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) {
  2060. return [name] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2061. return new TType(pos, name, args);
  2062. };
  2063. }
  2064. template<typename TType>
  2065. TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) {
  2066. return [minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2067. return new TType(pos, minArgs, maxArgs, args);
  2068. };
  2069. }
  2070. template<typename TType>
  2071. TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) {
  2072. return [name, minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2073. return new TType(pos, name, minArgs, maxArgs, args);
  2074. };
  2075. }
  2076. template<typename TType>
  2077. TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) {
  2078. return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2079. return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args);
  2080. };
  2081. }
  2082. template<typename TType>
  2083. TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) {
  2084. return [arg] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2085. return new TType(pos, args, arg);
  2086. };
  2087. }
  2088. template<typename TType>
  2089. TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) {
  2090. return [name, defaultValue] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  2091. return new TType(pos, name, "Bool", defaultValue, 1, args);
  2092. };
  2093. }
  2094. TNodePtr MakePair(TPosition pos, const TVector<TNodePtr>& args) {
  2095. TNodePtr list = new TAstListNodeImpl(pos, {
  2096. args[0],
  2097. args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) })
  2098. });
  2099. return new TAstListNodeImpl(pos, {
  2100. new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default),
  2101. list
  2102. });
  2103. }
  2104. struct TBuiltinFuncData {
  2105. const TBuiltinFactoryCallbackMap BuiltinFuncs;
  2106. const TAggrFuncFactoryCallbackMap AggrFuncs;
  2107. const TCoreFuncMap CoreFuncs;
  2108. TBuiltinFuncData():
  2109. BuiltinFuncs(MakeBuiltinFuncs()),
  2110. AggrFuncs(MakeAggrFuncs()),
  2111. CoreFuncs(MakeCoreFuncs())
  2112. {
  2113. }
  2114. TBuiltinFactoryCallbackMap MakeBuiltinFuncs() {
  2115. TBuiltinFactoryCallbackMap builtinFuncs = {
  2116. // Branching
  2117. {"if", BuildSimpleBuiltinFactoryCallback<TYqlIf<false>>()},
  2118. {"ifstrict", BuildSimpleBuiltinFactoryCallback<TYqlIf<true>>() },
  2119. // String builtins
  2120. {"len", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  2121. {"length", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  2122. {"charlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  2123. {"characterlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  2124. {"substring", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Substring")},
  2125. {"find", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("Find")},
  2126. {"rfind", BuildNamedBuiltinFactoryCallback<TYqlSubstring>("RFind")},
  2127. {"byteat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ByteAt", 2, 2) },
  2128. {"startswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StartsWith", 2, 2)},
  2129. {"endswith", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EndsWith", 2, 2)},
  2130. // Numeric builtins
  2131. {"abs", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Abs", 1, 1) },
  2132. {"tobytes", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToBytes", 1, 1) },
  2133. {"frombytes", BuildSimpleBuiltinFactoryCallback<TFromBytes>() },
  2134. // Compare builtins
  2135. {"minof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
  2136. {"maxof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
  2137. {"greatest", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
  2138. {"least", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
  2139. {"in", BuildSimpleBuiltinFactoryCallback<TYqlIn>()},
  2140. // List builtins
  2141. {"aslist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsList", 0, -1)},
  2142. {"asliststrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListStrict", 0, -1) },
  2143. {"listlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
  2144. {"listhasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
  2145. {"listextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtend", 0, -1)},
  2146. {"listextendstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListExtendStrict", 0, -1)},
  2147. {"listunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListUnionAll", 0, -1) },
  2148. {"listzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZip", -1, -1)},
  2149. {"listzipall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListZipAll", -1, -1)},
  2150. {"listenumerate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListEnumerate", 1, 3)},
  2151. {"listreverse", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListReverse", 1, 1)},
  2152. {"listskip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSkip", 2, 2)},
  2153. {"listtake", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTake", 2, 2)},
  2154. {"listhead", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHead", 1, 1)},
  2155. {"listlast", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListLast", 1, 1)},
  2156. {"listsort", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
  2157. {"listsortasc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
  2158. {"listsortdesc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(false)},
  2159. {"listmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(false)},
  2160. {"listflatmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(true)},
  2161. {"listfilter", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListFilter")},
  2162. {"listany", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAny", 1, 1)},
  2163. {"listall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAll", 1, 1)},
  2164. {"listhas", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListHas", 2, 2)},
  2165. {"listmax", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMax", 1, 1)},
  2166. {"listmin", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListMin", 1, 1)},
  2167. {"listsum", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListSum", 1, 1)},
  2168. {"listavg", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListAvg", 1, 1)},
  2169. {"listconcat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListConcat", 1, 2)},
  2170. {"listextract", BuildSimpleBuiltinFactoryCallback<TListExtractBuiltin>()},
  2171. {"listuniq", BuildSimpleBuiltinFactoryCallback<TListUniqBuiltin>()},
  2172. {"listcreate", BuildSimpleBuiltinFactoryCallback<TListCreateBuiltin>()},
  2173. {"listfromrange", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFromRange", 2, 3) },
  2174. {"listreplicate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Replicate", 2, 2) },
  2175. {"listtakewhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhile") },
  2176. {"listskipwhile", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhile") },
  2177. {"listtakewhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListTakeWhileInclusive") },
  2178. {"listskipwhileinclusive", BuildNamedBuiltinFactoryCallback<TListFilterBuiltin>("ListSkipWhileInclusive") },
  2179. {"listcollect", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCollect", 1, 1) },
  2180. {"listnotnull", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListNotNull", 1, 1)},
  2181. {"listflatten", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFlatten", 1, 1)},
  2182. // Dict builtins
  2183. {"dictlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
  2184. {"dicthasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
  2185. {"dictcreate", BuildSimpleBuiltinFactoryCallback<TDictCreateBuiltin>()},
  2186. {"setcreate", BuildSimpleBuiltinFactoryCallback<TSetCreateBuiltin>()},
  2187. {"asdict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDict", 0, -1)},
  2188. {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 0, -1)},
  2189. {"asset", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSet", 0, -1)},
  2190. {"assetstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsSetStrict", 0, -1)},
  2191. {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("One")},
  2192. {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("Many")},
  2193. {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("One")},
  2194. {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("Many")},
  2195. {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) },
  2196. {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) },
  2197. {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) },
  2198. {"dictlookup", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Lookup", 2, 2) },
  2199. {"dictcontains", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Contains", 2, 2) },
  2200. // Atom builtins
  2201. {"asatom", BuildSimpleBuiltinFactoryCallback<TYqlAsAtom>()},
  2202. {"secureparam", BuildNamedBuiltinFactoryCallback<TYqlAtom>("SecureParam")},
  2203. {"void", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Void", 0, 0)},
  2204. {"emptylist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyList", 0, 0)},
  2205. {"emptydict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDict", 0, 0)},
  2206. {"callable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Callable", 2, 2)},
  2207. {"way", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Way", 1, 1) },
  2208. {"variant", BuildSimpleBuiltinFactoryCallback<TYqlVariant>() },
  2209. {"enum", BuildSimpleBuiltinFactoryCallback<TYqlEnum>() },
  2210. {"asvariant", BuildSimpleBuiltinFactoryCallback<TYqlAsVariant>() },
  2211. {"asenum", BuildSimpleBuiltinFactoryCallback<TYqlAsEnum>() },
  2212. {"astagged", BuildSimpleBuiltinFactoryCallback<TYqlAsTagged>() },
  2213. {"untag", BuildSimpleBuiltinFactoryCallback<TYqlUntag>() },
  2214. {"parsetype", BuildSimpleBuiltinFactoryCallback<TYqlParseType>() },
  2215. {"ensuretype", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<true>>() },
  2216. {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<false>>() },
  2217. {"ensure", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Ensure", 2, 3) },
  2218. {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateExpr", 1, 1) },
  2219. {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateAtom", 1, 1) },
  2220. {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateType", 1, 1) },
  2221. {"unwrap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unwrap", 1, 2) },
  2222. {"just", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Just", 1, 1) },
  2223. {"nothing", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nothing", 1, 1) },
  2224. {"formattype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatType", 1, 1) },
  2225. {"pgtype", BuildSimpleBuiltinFactoryCallback<TYqlPgType>() },
  2226. {"pgconst", BuildSimpleBuiltinFactoryCallback<TYqlPgConst>() },
  2227. {"typeof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeOf", 1, 1) },
  2228. {"instanceof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("InstanceOf", 1, 1) },
  2229. {"datatype", BuildSimpleBuiltinFactoryCallback<TYqlDataType>() },
  2230. {"optionaltype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalType", 1, 1) },
  2231. {"listtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListType", 1, 1) },
  2232. {"streamtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamType", 1, 1) },
  2233. {"dicttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictType", 2, 2) },
  2234. {"tupletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleType", 0, -1) },
  2235. {"generictype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("GenericType", 0, 0) },
  2236. {"unittype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnitType", 0, 0) },
  2237. {"voidtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidType", 0, 0) },
  2238. {"resourcetype", BuildSimpleBuiltinFactoryCallback<TYqlResourceType>() },
  2239. {"taggedtype", BuildSimpleBuiltinFactoryCallback<TYqlTaggedType>() },
  2240. {"varianttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantType", 1, 1) },
  2241. {"callabletype", BuildSimpleBuiltinFactoryCallback<TYqlCallableType>() },
  2242. {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalItemType", 1, 1) },
  2243. {"listitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListItemType", 1, 1) },
  2244. {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamItemType", 1, 1) },
  2245. {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeyType", 1, 1) },
  2246. {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloadType", 1, 1) },
  2247. {"tupleelementtype", BuildSimpleBuiltinFactoryCallback<TYqlTupleElementType>() },
  2248. {"structmembertype", BuildSimpleBuiltinFactoryCallback<TYqlStructMemberType>() },
  2249. {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableResultType", 1, 1) },
  2250. {"callableargumenttype", BuildSimpleBuiltinFactoryCallback<TYqlCallableArgumentType>() },
  2251. {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantUnderlyingType", 1, 1) },
  2252. {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromYsonSimpleType", 2, 2) },
  2253. {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDate", 0, -1) },
  2254. {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDatetime", 0, -1) },
  2255. {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcTimestamp", 0, -1) },
  2256. { "currenttzdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDate", 1, -1) },
  2257. { "currenttzdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzDatetime", 1, -1) },
  2258. { "currenttztimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(1, "CurrentTzTimestamp", 1, -1) },
  2259. {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationId", 0, 0) },
  2260. {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationSharedId", 0, 0) },
  2261. {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentAuthenticatedUser", 0, 0) },
  2262. {"addtimezone", BuildSimpleBuiltinFactoryCallback<TYqlAddTimezone>() },
  2263. {"removetimezone", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("RemoveTimezone", 1, 1) },
  2264. {"pickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Pickle", 1, 1) },
  2265. {"stablepickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StablePickle", 1, 1) },
  2266. {"unpickle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unpickle", 2, 2) },
  2267. {"typehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeHandle", 1, 1) },
  2268. {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ParseTypeHandle", 1, 1) },
  2269. {"typekind", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeKind", 1, 1) },
  2270. {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeComponents", 1, 1) },
  2271. {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeHandle", 1, 1) },
  2272. {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalTypeHandle", 1, 1) },
  2273. {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTypeHandle", 1, 1) },
  2274. {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamTypeHandle", 1, 1) },
  2275. {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeComponents", 1, 1) },
  2276. {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeHandle", 1, 1) },
  2277. {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeComponents", 1, 1) },
  2278. {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeHandle", 1, 1) },
  2279. {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeComponents", 1, 1) },
  2280. {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeHandle", 2, 2) },
  2281. {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeTag", 1, 1) },
  2282. {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeHandle", 1, 1) },
  2283. {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeComponents", 1, 1) },
  2284. {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeHandle", 2, 2) },
  2285. {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantTypeHandle", 1, 1) },
  2286. {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidTypeHandle", 0, 0) },
  2287. {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("NullTypeHandle", 0, 0) },
  2288. {"emptylisttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyListTypeHandle", 0, 0) },
  2289. {"emptydicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EmptyDictTypeHandle", 0, 0) },
  2290. {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeComponents", 1, 1) },
  2291. {"callableargument", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableArgument", 1, 3) },
  2292. {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeHandle", 2, 4) },
  2293. {"formatcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatCode", 1, 1) },
  2294. {"worldcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("WorldCode", 0, 0) },
  2295. {"atomcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AtomCode", 1, 1) },
  2296. {"listcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCode", 0, -1) },
  2297. {"funccode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FuncCode", 1, -1) },
  2298. {"lambdacode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaCode", 1, 2) },
  2299. {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateCode", 1, 1) },
  2300. {"reprcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ReprCode", 1, 1) },
  2301. {"quotecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("QuoteCode", 1, 1) },
  2302. {"lambdaargumentscount", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaArgumentsCount", 1, 1) },
  2303. {"subqueryextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryExtend", 1, -1) },
  2304. {"subqueryunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionAll", 1, -1) },
  2305. {"subquerymerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryMerge", 1, -1) },
  2306. {"subqueryunionmerge", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SubqueryUnionMerge", 1, -1) },
  2307. {"subqueryextendfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryExtendFor>>() },
  2308. {"subqueryunionallfor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionAllFor>>() },
  2309. {"subquerymergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryMergeFor>>() },
  2310. {"subqueryunionmergefor", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryFor<SubqueryUnionMergeFor>>() },
  2311. {"subqueryorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryOrderBy>>() },
  2312. {"subqueryassumeorderby", BuildSimpleBuiltinFactoryCallback<TYqlSubqueryOrderBy<SubqueryAssumeOrderBy>>() },
  2313. // Tuple builtins
  2314. {"astuple", BuildSimpleBuiltinFactoryCallback<TTupleNode>()},
  2315. // Struct builtins
  2316. {"trymember", BuildNamedBuiltinFactoryCallback<TTryMember>("TryMember")},
  2317. {"addmember", BuildNamedBuiltinFactoryCallback<TAddMember>("AddMember")},
  2318. {"replacemember", BuildNamedBuiltinFactoryCallback<TAddMember>("ReplaceMember")},
  2319. {"removemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("RemoveMember")},
  2320. {"forceremovemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("ForceRemoveMember")},
  2321. {"combinemembers", BuildNamedBuiltinFactoryCallback<TCombineMembers>("FlattenMembers")},
  2322. {"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
  2323. {"staticmap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticMap", 2, 2) },
  2324. {"staticzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StaticZip", 1, -1) },
  2325. // File builtins
  2326. {"filepath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FilePath")},
  2327. {"filecontent", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FileContent")},
  2328. {"folderpath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FolderPath") },
  2329. {"files", BuildNamedBuiltinFactoryCallback<TYqlAtom>("Files")},
  2330. {"parsefile", BuildSimpleBuiltinFactoryCallback<TYqlParseFileOp>()},
  2331. // Misc builtins
  2332. {"coalesce", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1)},
  2333. {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) },
  2334. {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) },
  2335. {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)},
  2336. {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)},
  2337. {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)},
  2338. {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) },
  2339. {"tablepath", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TablePath") },
  2340. {"tablerecordindex", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
  2341. {"tablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<false>>() },
  2342. {"jointablerow", BuildSimpleBuiltinFactoryCallback<TTableRow<true>>() },
  2343. {"tablerows", BuildSimpleBuiltinFactoryCallback<TTableRows>() },
  2344. {"weakfield", BuildSimpleBuiltinFactoryCallback<TWeakFieldOp>()},
  2345. // Hint builtins
  2346. {"grouping", BuildSimpleBuiltinFactoryCallback<TGroupingNode>()},
  2347. // Window funcitons
  2348. {"rownumber", BuildNamedArgcBuiltinFactoryCallback<TWinRowNumber>("RowNumber", 0, 0)},
  2349. {"rank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("Rank", 0, 1)},
  2350. {"denserank", BuildNamedArgcBuiltinFactoryCallback<TWinRank>("DenseRank", 0, 1)},
  2351. {"lead", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lead", 1, 2)},
  2352. {"lag", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lag", 1, 2)},
  2353. // Session window
  2354. {"sessionwindow", BuildSimpleBuiltinFactoryCallback<TSessionWindow>()},
  2355. {"sessionstart", BuildSimpleBuiltinFactoryCallback<TSessionStart<true>>()},
  2356. {"sessionstate", BuildSimpleBuiltinFactoryCallback<TSessionStart<false>>()},
  2357. // Hopping intervals time functions
  2358. {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()},
  2359. {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()},
  2360. };
  2361. return builtinFuncs;
  2362. }
  2363. TAggrFuncFactoryCallbackMap MakeAggrFuncs() {
  2364. constexpr auto OverWindow = EAggregateMode::OverWindow;
  2365. TAggrFuncFactoryCallbackMap aggrFuncs = {
  2366. {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")},
  2367. {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")},
  2368. {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)},
  2369. {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)},
  2370. {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")},
  2371. {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) },
  2372. {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")},
  2373. {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")},
  2374. {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)},
  2375. {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")},
  2376. {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")},
  2377. {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")},
  2378. {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")},
  2379. {"boolxor", BuildAggrFuncFactoryCallback("BoolXor", "xor_traits_factory")},
  2380. {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")},
  2381. {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")},
  2382. {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")},
  2383. {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")},
  2384. {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) },
  2385. {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  2386. {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  2387. {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  2388. {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  2389. {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  2390. {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  2391. {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)},
  2392. {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)},
  2393. {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) },
  2394. {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) },
  2395. {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)},
  2396. {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)},
  2397. {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)},
  2398. {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)},
  2399. {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")},
  2400. {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)},
  2401. {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)},
  2402. {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)},
  2403. {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)},
  2404. {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)},
  2405. {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)},
  2406. {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)},
  2407. {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")},
  2408. {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)},
  2409. {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")},
  2410. {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")},
  2411. {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")},
  2412. {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")},
  2413. {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  2414. {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  2415. {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  2416. {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  2417. {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  2418. {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  2419. {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  2420. {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  2421. {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  2422. {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  2423. {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  2424. {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  2425. {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  2426. {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  2427. {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)},
  2428. {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")},
  2429. {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")},
  2430. {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)},
  2431. {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")},
  2432. {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")},
  2433. {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")},
  2434. {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)},
  2435. {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")},
  2436. {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")},
  2437. {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)},
  2438. // Window functions
  2439. {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})},
  2440. {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})},
  2441. {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})},
  2442. {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})},
  2443. };
  2444. return aggrFuncs;
  2445. }
  2446. TCoreFuncMap MakeCoreFuncs() {
  2447. TCoreFuncMap coreFuncs = {
  2448. {"listindexof", { "IndexOf", 2, 2}},
  2449. {"testbit", { "TestBit", 2, 2}},
  2450. {"setbit", { "SetBit", 2, 2}},
  2451. {"clearbit", { "ClearBit", 2, 2}},
  2452. {"flipbit", { "FlipBit", 2, 2 }},
  2453. {"toset", { "ToSet", 1, 1 }},
  2454. {"setisdisjoint", { "SetIsDisjoint", 2, 2}},
  2455. {"setintersection", { "SetIntersection", 2, 3}},
  2456. {"setincludes", { "SetIncludes", 2, 2}},
  2457. {"setunion", { "SetUnion", 2, 3}},
  2458. {"setdifference", { "SetDifference", 2, 2}},
  2459. {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}},
  2460. {"listaggregate", { "ListAggregate", 2, 2}},
  2461. {"dictaggregate", { "DictAggregate", 2, 2}},
  2462. {"aggregatetransforminput", { "AggregateTransformInput", 2, 2}},
  2463. {"aggregatetransformoutput", { "AggregateTransformOutput", 2, 2}},
  2464. {"aggregateflatten", { "AggregateFlatten", 1, 1}},
  2465. {"choosemembers", { "ChooseMembers", 2, 2}},
  2466. {"removemembers", { "RemoveMembers", 2, 2}},
  2467. {"forceremovemembers", { "ForceRemoveMembers", 2, 2}},
  2468. {"structmembers", { "StructMembers", 1, 1}},
  2469. {"gathermembers", { "GatherMembers", 1, 1}},
  2470. {"renamemembers", { "RenameMembers", 2, 2}},
  2471. {"forcerenamemembers", { "ForceRenameMembers", 2, 2}},
  2472. {"spreadmembers", { "SpreadMembers", 2, 2}},
  2473. {"forcespreadmembers", { "ForceSpreadMembers", 2, 2}},
  2474. };
  2475. return coreFuncs;
  2476. }
  2477. };
  2478. TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector<TNodePtr>& args,
  2479. const TString& originalNameSpace, EAggregateMode aggMode, bool* mustUseNamed, bool warnOnYqlNameSpace) {
  2480. const TBuiltinFuncData* funcData = Singleton<TBuiltinFuncData>();
  2481. const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs;
  2482. const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs;
  2483. const TCoreFuncMap& coreFuncs = funcData->CoreFuncs;
  2484. for (auto& arg: args) {
  2485. if (!arg) {
  2486. return nullptr;
  2487. }
  2488. }
  2489. TString normalizedName(name);
  2490. TString nameSpace(originalNameSpace);
  2491. TString ns = to_lower(nameSpace);
  2492. if (ns.empty()) {
  2493. TMaybe<TIssue> error = NormalizeName(pos, normalizedName);
  2494. if (!error.Empty()) {
  2495. return new TInvalidBuiltin(pos, error->Message);
  2496. }
  2497. auto coreFunc = coreFuncs.find(normalizedName);
  2498. if (coreFunc != coreFuncs.end()) {
  2499. ns = "core";
  2500. name = coreFunc->second.Name;
  2501. if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) {
  2502. return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from "
  2503. << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size());
  2504. }
  2505. if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) {
  2506. name += ToString(args.size());
  2507. }
  2508. }
  2509. }
  2510. TString moduleResource;
  2511. if (ctx.Settings.ModuleMapping.contains(ns)) {
  2512. moduleResource = ctx.Settings.ModuleMapping.at(ns);
  2513. }
  2514. if (ns == "js") {
  2515. ns = "javascript";
  2516. nameSpace = "JavaScript";
  2517. }
  2518. if (ns == "datetime2") {
  2519. ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_DATETIME2) << "DateTime2:: is a temporary alias for DateTime:: which will be removed in the future, use DateTime:: instead";
  2520. }
  2521. if (ns == "datetime") {
  2522. ns = "datetime2";
  2523. nameSpace = "DateTime2";
  2524. }
  2525. auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns);
  2526. switch (scriptType) {
  2527. case NKikimr::NMiniKQL::EScriptType::Python:
  2528. case NKikimr::NMiniKQL::EScriptType::Python3:
  2529. case NKikimr::NMiniKQL::EScriptType::ArcPython3:
  2530. scriptType = NKikimr::NMiniKQL::EScriptType::Python3;
  2531. break;
  2532. case NKikimr::NMiniKQL::EScriptType::Python2:
  2533. scriptType = NKikimr::NMiniKQL::EScriptType::ArcPython2;
  2534. break;
  2535. case NKikimr::NMiniKQL::EScriptType::SystemPython2:
  2536. scriptType = NKikimr::NMiniKQL::EScriptType::Python2;
  2537. break;
  2538. default:
  2539. break;
  2540. }
  2541. if (ns == "yql" || ns == "@yql") {
  2542. if (warnOnYqlNameSpace && GetEnv("YQL_DETERMINISTIC_MODE").empty()) {
  2543. ctx.Warning(pos, TIssuesIds::YQL_S_EXPRESSIONS_CALL)
  2544. << "It is not recommended to directly access s-expressions functions via YQL::" << Endl
  2545. << "This mechanism is mostly intended for temporary workarounds or internal testing purposes";
  2546. }
  2547. if (ns == "yql") {
  2548. return new TCallNodeImpl(pos, name, -1, -1, args);
  2549. }
  2550. } else if (moduleResource) {
  2551. auto exportName = ns == "core" ? name : "$" + name;
  2552. TVector<TNodePtr> applyArgs = {
  2553. new TCallNodeImpl(pos, "bind", {
  2554. BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName)
  2555. })
  2556. };
  2557. applyArgs.insert(applyArgs.end(), args.begin(), args.end());
  2558. return new TCallNodeImpl(pos, "Apply", applyArgs);
  2559. } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) {
  2560. TString moduleName(nameSpace);
  2561. moduleName.to_title();
  2562. if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) {
  2563. TVector<TNodePtr> multiArgs{
  2564. ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0],
  2565. new TCallNodeImpl(pos, "Void", 0, 0, {}),
  2566. args[0]
  2567. };
  2568. auto fullName = moduleName + "." + name;
  2569. return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1);
  2570. } else if (!(ns.StartsWith("re2") && name == "Options")) {
  2571. auto newArgs = args;
  2572. if (ns.StartsWith("re2")) {
  2573. // convert run config is tuple of string and optional options
  2574. if (args.size() == 1 || args.size() == 2) {
  2575. newArgs[0] = MakePair(pos, args);
  2576. if (args.size() == 2) {
  2577. newArgs.pop_back();
  2578. }
  2579. } else {
  2580. return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments.");
  2581. }
  2582. }
  2583. return BuildUdf(ctx, pos, moduleName, name, newArgs);
  2584. }
  2585. } else if (ns == "datetime2" && (name == "Format" || name == "Parse")) {
  2586. return BuildUdf(ctx, pos, nameSpace, name, args);
  2587. } else if (name == "MakeLibraPreprocessor") {
  2588. if (args.size() != 1) {
  2589. return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires exactly one argument");
  2590. }
  2591. auto settings = NYT::TNode::CreateMap();
  2592. auto makeUdfArgs = [&args, &pos, &settings]() {
  2593. return TVector<TNodePtr> {
  2594. args[0],
  2595. new TCallNodeImpl(pos, "Void", {}),
  2596. BuildQuotedAtom(pos, NYT::NodeToYsonString(settings))
  2597. };
  2598. };
  2599. auto structNode = dynamic_cast<TStructNode*>(args[0].Get());
  2600. if (!structNode) {
  2601. if (auto callNode = dynamic_cast<TCallNode*>(args[0].Get())) {
  2602. if (callNode->GetOpName() == "AsStruct") {
  2603. return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs());
  2604. }
  2605. }
  2606. return new TInvalidBuiltin(pos, TStringBuilder() << name << " requires struct as argument");
  2607. }
  2608. for (const auto& item : structNode->GetExprs()) {
  2609. const auto& label = item->GetLabel();
  2610. if (label == "Entities") {
  2611. auto callNode = dynamic_cast<TCallNode*>(item.Get());
  2612. if (!callNode || callNode->GetOpName() != "AsList") {
  2613. return new TInvalidBuiltin(pos, TStringBuilder() << name << " entities must be list of strings");
  2614. }
  2615. auto entities = NYT::TNode::CreateList();
  2616. for (const auto& entity : callNode->GetArgs()) {
  2617. if (!entity->IsLiteral() || entity->GetLiteralType() != "String") {
  2618. return new TInvalidBuiltin(pos, TStringBuilder() << name << " entity must be string literal");
  2619. }
  2620. entities.Add(entity->GetLiteralValue());
  2621. }
  2622. settings(label, std::move(entities));
  2623. } else if (label == "EntitiesStrategy") {
  2624. if (!item->IsLiteral() || item->GetLiteralType() != "String") {
  2625. return new TInvalidBuiltin(
  2626. pos, TStringBuilder() << name << " entities strategy must be string literal"
  2627. );
  2628. }
  2629. if (!EqualToOneOf(item->GetLiteralValue(), "whitelist", "blacklist")) {
  2630. return new TInvalidBuiltin(
  2631. pos,
  2632. TStringBuilder() << name << " got invalid entities strategy: expected 'whitelist' or 'blacklist'"
  2633. );
  2634. }
  2635. settings(label, item->GetLiteralValue());
  2636. } else if (label == "Mode") {
  2637. if (!item->IsLiteral() || item->GetLiteralType() != "String") {
  2638. return new TInvalidBuiltin(
  2639. pos, TStringBuilder() << name << " mode must be string literal"
  2640. );
  2641. }
  2642. settings(label, item->GetLiteralValue());
  2643. } else if (EqualToOneOf(label, "BlockstatDict", "ParseWithFat")) {
  2644. continue;
  2645. } else {
  2646. return new TInvalidBuiltin(
  2647. pos,
  2648. TStringBuilder()
  2649. << name << " got unsupported setting: " << label
  2650. << "; supported: Entities, EntitiesStrategy, BlockstatDict, ParseWithFat" );
  2651. }
  2652. }
  2653. return BuildUdf(ctx, pos, nameSpace, name, makeUdfArgs());
  2654. } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) {
  2655. auto scriptName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType);
  2656. return new TScriptUdf(pos, TString(scriptName), name, args);
  2657. } else if (ns.empty()) {
  2658. if (auto simpleType = LookupSimpleTypeBySqlAlias(normalizedName, ctx.FlexibleTypes)) {
  2659. const auto type = ToString(*simpleType);
  2660. if (NUdf::FindDataSlot(type)) {
  2661. YQL_ENSURE(type != "Decimal");
  2662. return new TYqlData(pos, type, args);
  2663. }
  2664. if (type == "Void" || type == "EmptyList" || type == "EmptyDict") {
  2665. return new TCallNodeImpl(pos, type, 0, 0, args);
  2666. } else {
  2667. return new TInvalidBuiltin(pos, TStringBuilder() << "Can not create objects of type " << type);
  2668. }
  2669. }
  2670. if (normalizedName == "decimal") {
  2671. if (args.size() == 2) {
  2672. TVector<TNodePtr> dataTypeArgs = { BuildQuotedAtom(pos, "Decimal", TNodeFlags::Default) };
  2673. for (auto& arg : args) {
  2674. if (auto literal = arg->GetLiteral("Int32")) {
  2675. dataTypeArgs.push_back(BuildQuotedAtom(pos, *literal, TNodeFlags::Default));
  2676. } else {
  2677. dataTypeArgs.push_back(MakeAtomFromExpression(ctx, arg).Build());
  2678. }
  2679. }
  2680. return new TCallNodeImpl(pos, "DataType", dataTypeArgs);
  2681. }
  2682. return new TYqlData(pos, "Decimal", args);
  2683. }
  2684. if (normalizedName == "tablename") {
  2685. return new TTableName(pos, args, ctx.Scoped->CurrService);
  2686. }
  2687. if (normalizedName == "aggregationfactory") {
  2688. if (args.size() < 1 || !args[0]->GetLiteral("String")) {
  2689. return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name");
  2690. }
  2691. auto aggNormalizedName = *args[0]->GetLiteral("String");
  2692. auto error = NormalizeName(pos, aggNormalizedName);
  2693. if (!error.Empty()) {
  2694. return new TInvalidBuiltin(pos, error->Message);
  2695. }
  2696. if (aggNormalizedName == "aggregateby") {
  2697. return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
  2698. }
  2699. if (aggNormalizedName == "multiaggregateby") {
  2700. return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
  2701. }
  2702. auto aggrCallback = aggrFuncs.find(aggNormalizedName);
  2703. if (aggrCallback == aggrFuncs.end()) {
  2704. return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
  2705. }
  2706. if (aggMode == EAggregateMode::Distinct) {
  2707. return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions");
  2708. }
  2709. return (*aggrCallback).second(pos, args, aggMode, true).Release();
  2710. }
  2711. if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") {
  2712. const bool multi = (normalizedName == "multiaggregateby");
  2713. if (args.size() != 2) {
  2714. return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments");
  2715. }
  2716. auto name = multi ? "MultiAggregateBy" : "AggregateBy";
  2717. auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi);
  2718. return new TBasicAggrFunc(pos, name, aggr, args);
  2719. }
  2720. auto aggrCallback = aggrFuncs.find(normalizedName);
  2721. if (aggrCallback != aggrFuncs.end()) {
  2722. return (*aggrCallback).second(pos, args, aggMode, false).Release();
  2723. }
  2724. if (aggMode == EAggregateMode::Distinct) {
  2725. return new TInvalidBuiltin(pos, "DISTINCT can only be used in aggregation functions");
  2726. }
  2727. auto builtinCallback = builtinFuncs.find(normalizedName);
  2728. if (builtinCallback != builtinFuncs.end()) {
  2729. return (*builtinCallback).second(pos, args);
  2730. } else if (normalizedName == "asstruct" || normalizedName == "structtype") {
  2731. if (args.empty()) {
  2732. return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args);
  2733. }
  2734. if (mustUseNamed && *mustUseNamed) {
  2735. *mustUseNamed = false;
  2736. YQL_ENSURE(args.size() == 2);
  2737. Y_VERIFY_DEBUG(dynamic_cast<TTupleNode*>(args[0].Get()));
  2738. auto posArgs = static_cast<TTupleNode*>(args[0].Get());
  2739. if (posArgs->IsEmpty()) {
  2740. if (normalizedName == "asstruct") {
  2741. return args[1];
  2742. } else {
  2743. Y_VERIFY_DEBUG(dynamic_cast<TStructNode*>(args[1].Get()));
  2744. auto namedArgs = static_cast<TStructNode*>(args[1].Get());
  2745. return new TStructTypeNode(pos, namedArgs->GetExprs());
  2746. }
  2747. }
  2748. }
  2749. return new TInvalidBuiltin(pos, TStringBuilder() <<
  2750. (normalizedName == "asstruct" ? "AsStruct" : "StructType") <<
  2751. " requires all argument to be named");
  2752. } else if (normalizedName == "expandstruct") {
  2753. if (mustUseNamed) {
  2754. if (!*mustUseNamed) {
  2755. return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument");
  2756. }
  2757. *mustUseNamed = false;
  2758. }
  2759. YQL_ENSURE(args.size() == 2);
  2760. auto posArgs = static_cast<TTupleNode*>(args[0].Get());
  2761. Y_VERIFY_DEBUG(dynamic_cast<TTupleNode*>(args[0].Get()));
  2762. Y_VERIFY_DEBUG(dynamic_cast<TStructNode*>(args[1].Get()));
  2763. if (posArgs->GetTupleSize() != 1) {
  2764. return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named");
  2765. }
  2766. TVector<TNodePtr> flattenMembersArgs = {
  2767. BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}),
  2768. BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}),
  2769. };
  2770. return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs);
  2771. } else if (normalizedName == "sqlexternalfunction") {
  2772. return new TCallNodeImpl(pos, "SqlExternalFunction", args);
  2773. } else {
  2774. return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name);
  2775. }
  2776. }
  2777. TNodePtr positionalArgs;
  2778. TNodePtr namedArgs;
  2779. if (mustUseNamed && *mustUseNamed) {
  2780. YQL_ENSURE(args.size() == 2);
  2781. positionalArgs = args[0];
  2782. namedArgs = args[1];
  2783. *mustUseNamed = false;
  2784. }
  2785. TVector<TNodePtr> usedArgs = args;
  2786. TNodePtr customUserType = nullptr;
  2787. if (ns == "json") {
  2788. ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF) << "Json UDF is deprecated. Please use JSON API instead";
  2789. ns = "yson";
  2790. nameSpace = "Yson";
  2791. if (name == "Serialize") {
  2792. name = "SerializeJson";
  2793. }
  2794. else if (name == "Parse") {
  2795. name = "ParseJson";
  2796. }
  2797. }
  2798. if (ctx.PragmaYsonFast && ns == "yson") {
  2799. ns.append('2');
  2800. nameSpace.append('2');
  2801. }
  2802. if (ns.StartsWith("yson")) {
  2803. if (name == "ConvertTo" && usedArgs.size() > 1) {
  2804. customUserType = usedArgs[1];
  2805. usedArgs.erase(usedArgs.begin() + 1);
  2806. }
  2807. if (name == "Serialize") {
  2808. if (usedArgs) {
  2809. usedArgs.resize(1U);
  2810. }
  2811. } else if (ctx.PragmaYsonFast && name == "SerializeJsonEncodeUtf8") {
  2812. name = "SerializeJson";
  2813. if (usedArgs.size() < 2U) {
  2814. usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
  2815. }
  2816. positionalArgs = BuildTuple(pos, usedArgs);
  2817. auto encodeUtf8 = BuildLiteralBool(pos, true);
  2818. encodeUtf8->SetLabel("EncodeUtf8");
  2819. namedArgs = BuildStructure(pos, {encodeUtf8});
  2820. usedArgs = {positionalArgs, namedArgs};
  2821. } else if (name.StartsWith("From")) {
  2822. if (usedArgs) {
  2823. usedArgs.resize(1U);
  2824. }
  2825. name = "From";
  2826. } else if (name == "GetLength" || name.StartsWith("ConvertTo") || name.StartsWith("Parse") || name.StartsWith("SerializeJson")) {
  2827. if (usedArgs.size() < 2U) {
  2828. usedArgs.emplace_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
  2829. }
  2830. } else if (name == "Contains" || name.StartsWith("Lookup") || name.StartsWith("YPath")) {
  2831. if (usedArgs.size() < 3U) {
  2832. usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict, ctx.PragmaYsonFast));
  2833. }
  2834. }
  2835. }
  2836. if (ns == "datetime2" && name == "Update") {
  2837. if (namedArgs) {
  2838. TStructNode* castedNamedArgs = dynamic_cast<TStructNode*>(namedArgs.Get());
  2839. Y_VERIFY_DEBUG(castedNamedArgs);
  2840. auto exprs = castedNamedArgs->GetExprs();
  2841. for (auto& arg : exprs) {
  2842. if (arg->GetLabel() == "Timezone") {
  2843. arg = new TCallNodeImpl(pos, "TimezoneId", 1, 1, { arg });
  2844. arg->SetLabel("TimezoneId");
  2845. }
  2846. }
  2847. namedArgs = BuildStructure(pos, exprs);
  2848. usedArgs.pop_back();
  2849. usedArgs.push_back(namedArgs);
  2850. };
  2851. }
  2852. TMaybe<TString> typeConfig = MakeTypeConfig(ns, usedArgs);
  2853. return BuildSqlCall(ctx, pos, nameSpace, name, usedArgs, positionalArgs, namedArgs, customUserType, typeConfig);
  2854. }
  2855. } // namespace NSQLTranslationV1