builtin.cpp 97 KB


  1. #include "node.h"
  2. #include "context.h"
  3. #include "list_builtin.h"
  4. #include <yql/essentials/ast/yql_type_string.h>
  5. #include <yql/essentials/public/udf/udf_data_type.h>
  6. #include <yql/essentials/minikql/mkql_program_builder.h>
  7. #include <yql/essentials/minikql/mkql_type_ops.h>
  8. #include <yql/essentials/public/issue/yql_issue_id.h>
  9. #include <library/cpp/charset/ci_string.h>
  10. #include <util/string/builder.h>
  11. #include <util/string/cast.h>
  12. #include <util/string/util.h>
  13. #include <util/string/join.h>
  14. #include <unordered_map>
  15. using namespace NYql;
  16. namespace NSQLTranslationV0 {
  17. class TGroupingNode final: public TAstListNode {
  18. public:
  19. TGroupingNode(TPosition pos, const TVector<TNodePtr>& args)
  20. : TAstListNode(pos)
  21. , Args(args)
  22. {}
  23. bool DoInit(TContext& ctx, ISource* src) final {
  24. if (!src) {
  25. ctx.Error(Pos) << "Grouping function should have source";
  26. return false;
  27. }
  28. TVector<TString> columns;
  29. columns.reserve(Args.size());
  30. for (const auto& node: Args) {
  31. auto namePtr = node->GetColumnName();
  32. if (!namePtr || !*namePtr) {
  33. ctx.Error(Pos) << "Grouping function should use columns as arguments";
  34. return false;
  35. }
  36. const auto column = *namePtr;
  37. ISource* composite = src->GetCompositeSource();
  38. if (!src->IsGroupByColumn(column) && !src->IsAlias(EExprSeat::GroupBy, column) && (!composite || !composite->IsGroupByColumn(column))) {
  39. ctx.Error(node->GetPos()) << "Column '" << column << "' not used as grouping column";
  40. return false;
  41. }
  42. columns.emplace_back(column);
  43. }
  44. ui64 hint;
  45. if (!src->CalculateGroupingHint(ctx, columns, hint)) {
  46. return false;
  47. }
  48. Nodes.push_back(BuildAtom(Pos, "Uint64"));
  49. Nodes.push_back(BuildQuotedAtom(Pos, IntToString<10>(hint)));
  50. return TAstListNode::DoInit(ctx, src);
  51. }
  52. TNodePtr DoClone() const final {
  53. return new TGroupingNode(Pos, Args);
  54. }
  55. private:
  56. const TVector<TNodePtr> Args;
  57. };
  58. class TBasicAggrFunc final: public TAstListNode {
  59. public:
  60. TBasicAggrFunc(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
  61. : TAstListNode(pos)
  62. , Name(name)
  63. , Aggr(aggr)
  64. , Args(args)
  65. {}
  66. TCiString GetName() const {
  67. return Name;
  68. }
  69. bool DoInit(TContext& ctx, ISource* src) final {
  70. if (!src) {
  71. ctx.Error(Pos) << "Unable to use aggregation function '" << Name << "' without data source";
  72. return false;
  73. }
  74. if (!DoInitAggregation(ctx, src)) {
  75. return false;
  76. }
  77. return TAstListNode::DoInit(ctx, src);
  78. }
  79. TNodePtr DoClone() const final {
  80. TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
  81. return new TBasicAggrFunc(Pos, Name, aggrClone, CloneContainer(Args));
  82. }
  83. TAggregationPtr GetAggregation() const override {
  84. return Aggr;
  85. }
  86. private:
  87. bool DoInitAggregation(TContext& ctx, ISource* src) {
  88. if (!Aggr->InitAggr(ctx, false, src, *this, Args)) {
  89. return false;
  90. }
  91. return src->AddAggregation(ctx, Aggr);
  92. }
  93. void DoUpdateState() const final {
  94. State.Set(ENodeState::Const, Args.front()->IsConstant());
  95. State.Set(ENodeState::Aggregated);
  96. }
  97. protected:
  98. const TString Name;
  99. TAggregationPtr Aggr;
  100. TVector<TNodePtr> Args;
  101. };
  102. class TBasicAggrFactory final : public TAstListNode {
  103. public:
  104. TBasicAggrFactory(TPosition pos, const TString& name, TAggregationPtr aggr, const TVector<TNodePtr>& args)
  105. : TAstListNode(pos)
  106. , Name(name)
  107. , Aggr(aggr)
  108. , Args(args)
  109. {}
  110. TCiString GetName() const {
  111. return Name;
  112. }
  113. bool DoInit(TContext& ctx, ISource* src) final {
  114. if (!DoInitAggregation(ctx)) {
  115. return false;
  116. }
  117. auto factory = Aggr->AggregationTraitsFactory();
  118. auto apply = Y("Apply", factory, Y("ListType", "type"));
  119. auto columnIndices = Aggr->GetFactoryColumnIndices();
  120. if (columnIndices.size() == 1) {
  121. apply = L(apply, "extractor");
  122. } else {
  123. // make several extractors from main that returns a tuple
  124. for (ui32 arg = 0; arg < columnIndices.size(); ++arg) {
  125. auto partial = BuildLambda(Pos, Y("row"), Y("Nth", Y("Apply", "extractor", "row"), Q(ToString(columnIndices[arg]))));
  126. apply = L(apply, partial);
  127. }
  128. }
  129. Aggr->AddFactoryArguments(apply);
  130. Lambda = BuildLambda(Pos, Y("type", "extractor"), apply);
  131. return TAstListNode::DoInit(ctx, src);
  132. }
  133. TAstNode* Translate(TContext& ctx) const override {
  134. return Lambda->Translate(ctx);
  135. }
  136. TNodePtr DoClone() const final {
  137. TAggregationPtr aggrClone = static_cast<IAggregation*>(Aggr->Clone().Release());
  138. return new TBasicAggrFactory(Pos, Name, aggrClone, CloneContainer(Args));
  139. }
  140. TAggregationPtr GetAggregation() const override {
  141. return Aggr;
  142. }
  143. private:
  144. bool DoInitAggregation(TContext& ctx) {
  145. return Aggr->InitAggr(ctx, true, nullptr, *this, Args);
  146. }
  147. protected:
  148. const TString Name;
  149. TAggregationPtr Aggr;
  150. TVector<TNodePtr> Args;
  151. TNodePtr Lambda;
  152. };
  153. typedef THolder<TBasicAggrFunc> TAggrFuncPtr;
  154. class TLiteralStringAtom: public INode {
  155. public:
  156. TLiteralStringAtom(TPosition pos, TNodePtr node, const TString& info)
  157. : INode(pos)
  158. , Node(node)
  159. , Info(info)
  160. {
  161. }
  162. bool DoInit(TContext& ctx, ISource* src) override {
  163. Y_UNUSED(src);
  164. if (!Node) {
  165. ctx.Error(Pos) << Info;
  166. return false;
  167. }
  168. if (!Node->Init(ctx, src)) {
  169. return false;
  170. }
  171. Atom = MakeAtomFromExpression(ctx, Node).Build();
  172. return true;
  173. }
  174. TAstNode* Translate(TContext& ctx) const override {
  175. return Atom->Translate(ctx);
  176. }
  177. TPtr DoClone() const final {
  178. return {};
  179. }
  180. private:
  181. TNodePtr Node;
  182. TNodePtr Atom;
  183. TString Info;
  184. };
  185. class TYqlAsAtom: public TLiteralStringAtom {
  186. public:
  187. TYqlAsAtom(TPosition pos, const TVector<TNodePtr>& args)
  188. : TLiteralStringAtom(pos, args.size() == 1 ? args[0] : nullptr, "Literal string is required as argument")
  189. {
  190. }
  191. };
  192. class TYqlData: public TCallNode {
  193. public:
  194. TYqlData(TPosition pos, const TString& type, const TVector<TNodePtr>& args)
  195. : TCallNode(pos, type, 1, 1, args)
  196. {
  197. }
  198. bool DoInit(TContext& ctx, ISource* src) override {
  199. auto slot = NUdf::FindDataSlot(GetOpName());
  200. if (!slot) {
  201. ctx.Error(Pos) << "Unexpected type " << GetOpName();
  202. return false;
  203. }
  204. if (*slot == NUdf::EDataSlot::Decimal) {
  205. MinArgs = MaxArgs = 3;
  206. }
  207. if (!ValidateArguments(ctx)) {
  208. return false;
  209. }
  210. auto stringNode = Args[0];
  211. auto atom = stringNode->GetLiteral("String");
  212. if (!atom) {
  213. ctx.Error(Pos) << "Expected literal string as argument in " << GetOpName() << " function";
  214. return false;
  215. }
  216. TString value;
  217. if (*slot == NUdf::EDataSlot::Decimal) {
  218. const auto precision = Args[1]->GetLiteral("Int32");
  219. const auto scale = Args[2]->GetLiteral("Int32");
  220. if (!NKikimr::NMiniKQL::IsValidDecimal(*atom)) {
  221. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  222. return false;
  223. }
  224. ui8 stub;
  225. if (!(precision && TryFromString<ui8>(*precision, stub))) {
  226. ctx.Error(Pos) << "Invalid precision " << (precision ? precision->Quote() : "") << " for type " << GetOpName();
  227. return false;
  228. }
  229. if (!(scale && TryFromString<ui8>(*scale, stub))) {
  230. ctx.Error(Pos) << "Invalid scale " << (scale ? scale->Quote() : "") << " for type " << GetOpName();
  231. return false;
  232. }
  233. Args[0] = BuildQuotedAtom(GetPos(), *atom);
  234. Args[1] = BuildQuotedAtom(GetPos(), *precision);
  235. Args[2] = BuildQuotedAtom(GetPos(), *scale);
  236. return TCallNode::DoInit(ctx, src);
  237. } else if (NUdf::GetDataTypeInfo(*slot).Features & (NUdf::DateType | NUdf::TzDateType | NUdf::TimeIntervalType)) {
  238. const auto out = NKikimr::NMiniKQL::ValueFromString(*slot, *atom);
  239. if (!out) {
  240. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  241. return false;
  242. }
  243. switch (*slot) {
  244. case NUdf::EDataSlot::Date:
  245. case NUdf::EDataSlot::TzDate:
  246. value = ToString(out.Get<ui16>());
  247. break;
  248. case NUdf::EDataSlot::Datetime:
  249. case NUdf::EDataSlot::TzDatetime:
  250. value = ToString(out.Get<ui32>());
  251. break;
  252. case NUdf::EDataSlot::Timestamp:
  253. case NUdf::EDataSlot::TzTimestamp:
  254. value = ToString(out.Get<ui64>());
  255. break;
  256. case NUdf::EDataSlot::Interval:
  257. value = ToString(out.Get<i64>());
  258. if ('T' == atom->back()) {
  259. ctx.Warning(Pos, TIssuesIds::YQL_DEPRECATED_INTERVAL_CONSTANT) << "Time prefix 'T' at end of interval constant";
  260. }
  261. break;
  262. default:
  263. Y_ABORT("Unexpected data slot");
  264. }
  265. if (NUdf::GetDataTypeInfo(*slot).Features & NUdf::TzDateType) {
  266. value += ",";
  267. value += NKikimr::NMiniKQL::GetTimezoneIANAName(out.GetTimezoneId());
  268. }
  269. } else if (NUdf::EDataSlot::Uuid == *slot) {
  270. char out[0x10];
  271. if (!NKikimr::NMiniKQL::ParseUuid(*atom, out)) {
  272. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  273. return false;
  274. }
  275. value.assign(out, sizeof(out));
  276. } else {
  277. if (!NKikimr::NMiniKQL::IsValidStringValue(*slot, *atom)) {
  278. ctx.Error(Pos) << "Invalid value " << atom->Quote() << " for type " << GetOpName();
  279. return false;
  280. }
  281. value = *atom;
  282. }
  283. Args[0] = BuildQuotedAtom(GetPos(), value);
  284. return TCallNode::DoInit(ctx, src);
  285. }
  286. TPtr DoClone() const final {
  287. return new TYqlData(GetPos(), OpName, CloneContainer(Args));
  288. }
  289. };
  290. class TTableName : public TCallNode {
  291. public:
  292. TTableName(TPosition pos, const TVector<TNodePtr>& args, const TString& cluster)
  293. : TCallNode(pos, "TableName", 0, 2, args)
  294. , Cluster(cluster)
  295. {
  296. }
  297. bool DoInit(TContext& ctx, ISource* src) override {
  298. if (!ValidateArguments(ctx)) {
  299. return false;
  300. }
  301. if (Args.empty()) {
  302. Args.push_back(Y("TablePath", Y("DependsOn", "row")));
  303. }
  304. if (Args.size() == 2) {
  305. auto literal = Args[1]->GetLiteral("String");
  306. if (!literal) {
  307. ctx.Error(Args[1]->GetPos()) << "Expected literal string as second argument in TableName function";
  308. return false;
  309. }
  310. Args[1] = BuildQuotedAtom(Args[1]->GetPos(), to_lower(*literal));
  311. } else {
  312. if (Cluster.empty()) {
  313. ctx.Error(GetPos()) << GetOpName() << " requires either one of \"yt\"/\"kikimr\"/\"rtmr\" as second argument or current cluster name";
  314. return false;
  315. }
  316. auto service = ctx.GetClusterProvider(Cluster);
  317. if (!service) {
  318. ctx.Error() << "Unknown cluster name: " << Cluster;
  319. return false;
  320. }
  321. Args.push_back(BuildQuotedAtom(GetPos(), to_lower(*service)));
  322. }
  323. return TCallNode::DoInit(ctx, src);
  324. }
  325. TPtr DoClone() const final {
  326. return new TTableName(GetPos(), CloneContainer(Args), Cluster);
  327. }
  328. void DoUpdateState() const override {
  329. State.Set(ENodeState::Const, false);
  330. }
  331. private:
  332. TString Cluster;
  333. };
  334. class TYqlParseType final : public INode {
  335. public:
  336. TYqlParseType(TPosition pos, const TVector<TNodePtr>& args)
  337. : INode(pos)
  338. , Args(args)
  339. {}
  340. TAstNode* Translate(TContext& ctx) const override {
  341. if (Args.size() != 1) {
  342. ctx.Error(Pos) << "Expected 1 argument in ParseType function";
  343. return nullptr;
  344. }
  345. auto literal = Args[0]->GetLiteral("String");
  346. if (!literal) {
  347. ctx.Error(Args[0]->GetPos()) << "Expected literal string as argument in ParseType function";
  348. return nullptr;
  349. }
  350. auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
  351. if (!parsed) {
  352. ctx.Error(Args[0]->GetPos()) << "Failed to parse type";
  353. return nullptr;
  354. }
  355. return parsed;
  356. }
  357. TNodePtr DoClone() const final {
  358. return new TYqlParseType(Pos, Args);
  359. }
  360. private:
  361. TVector<TNodePtr> Args;
  362. };
  363. class TYqlAddTimezone: public TCallNode {
  364. public:
  365. TYqlAddTimezone(TPosition pos, const TVector<TNodePtr>& args)
  366. : TCallNode(pos, "AddTimezone", 2, 2, args)
  367. {
  368. }
  369. bool DoInit(TContext& ctx, ISource* src) override {
  370. if (!ValidateArguments(ctx)) {
  371. return false;
  372. }
  373. Args[1] = Y("TimezoneId", Args[1]);
  374. return TCallNode::DoInit(ctx, src);
  375. }
  376. TNodePtr DoClone() const final {
  377. return new TYqlAddTimezone(Pos, CloneContainer(Args));
  378. }
  379. };
  380. template <bool Strict>
  381. class TYqlTypeAssert : public TCallNode {
  382. public:
  383. TYqlTypeAssert(TPosition pos, const TVector<TNodePtr>& args)
  384. : TCallNode(pos, Strict ? "EnsureType" : "EnsureConvertibleTo", 2, 3, args)
  385. {
  386. }
  387. bool DoInit(TContext& ctx, ISource* src) override {
  388. if (!ValidateArguments(ctx)) {
  389. return false;
  390. }
  391. auto literal = Args[1]->GetLiteral("String");
  392. INode::TPtr type;
  393. if (literal) {
  394. auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
  395. if (!parsed) {
  396. ctx.Error(Args[1]->GetPos()) << "Failed to parse type";
  397. return false;
  398. }
  399. type = AstNode(parsed);
  400. } else {
  401. type = Args[1];
  402. }
  403. if (!type->Init(ctx, src)) {
  404. return false;
  405. }
  406. Args[1] = type;
  407. if (Args.size() == 3) {
  408. if (!Args[2]->Init(ctx, src)) {
  409. return false;
  410. }
  411. auto message = MakeAtomFromExpression(ctx, Args[2]).Build();
  412. Args[2] = message;
  413. }
  414. return TCallNode::DoInit(ctx, src);
  415. }
  416. TNodePtr DoClone() const final {
  417. return new TYqlTypeAssert<Strict>(Pos, Args);
  418. }
  419. };
  420. class TFromBytes final : public TCallNode {
  421. public:
  422. TFromBytes(TPosition pos, const TVector<TNodePtr>& args)
  423. : TCallNode(pos, "FromBytes", 2, 2, args)
  424. {}
  425. bool DoInit(TContext& ctx, ISource* src) override {
  426. if (!ValidateArguments(ctx)) {
  427. return false;
  428. }
  429. if (!Args[1]->Init(ctx, src)) {
  430. return false;
  431. }
  432. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  433. return TCallNode::DoInit(ctx, src);
  434. }
  435. TNodePtr DoClone() const final {
  436. return new TFromBytes(Pos, Args);
  437. }
  438. };
  439. class TYqlTaggedBase : public TCallNode {
  440. public:
  441. TYqlTaggedBase(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  442. : TCallNode(pos, opName, 2, 2, args)
  443. {}
  444. bool DoInit(TContext& ctx, ISource* src) override {
  445. if (!ValidateArguments(ctx)) {
  446. return false;
  447. }
  448. if (!Args[1]->Init(ctx, src)) {
  449. return false;
  450. }
  451. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  452. return TCallNode::DoInit(ctx, src);
  453. }
  454. };
  455. class TYqlAsTagged final : public TYqlTaggedBase {
  456. public:
  457. TYqlAsTagged(TPosition pos, const TVector<TNodePtr>& args)
  458. : TYqlTaggedBase(pos, "AsTagged", args)
  459. {}
  460. TNodePtr DoClone() const final {
  461. return new TYqlAsTagged(Pos, Args);
  462. }
  463. };
  464. class TYqlUntag final : public TYqlTaggedBase {
  465. public:
  466. TYqlUntag(TPosition pos, const TVector<TNodePtr>& args)
  467. : TYqlTaggedBase(pos, "Untag", args)
  468. {}
  469. TNodePtr DoClone() const final {
  470. return new TYqlUntag(Pos, Args);
  471. }
  472. };
  473. class TYqlVariant final : public TCallNode {
  474. public:
  475. TYqlVariant(TPosition pos, const TVector<TNodePtr>& args)
  476. : TCallNode(pos, "Variant", 3, 3, args)
  477. {}
  478. bool DoInit(TContext& ctx, ISource* src) override {
  479. if (!ValidateArguments(ctx)) {
  480. return false;
  481. }
  482. if (!Args[1]->Init(ctx, src)) {
  483. return false;
  484. }
  485. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  486. return TCallNode::DoInit(ctx, src);
  487. }
  488. TNodePtr DoClone() const final {
  489. return new TYqlVariant(Pos, Args);
  490. }
  491. };
  492. TNodePtr BuildFileNameArgument(TPosition pos, const TNodePtr& argument) {
  493. return new TLiteralStringAtom(pos, argument, "FilePath requires string literal as parameter");
  494. }
  495. class TYqlAtom final: public TCallNode {
  496. public:
  497. TYqlAtom(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  498. : TCallNode(pos, opName, 1, 1, args)
  499. {}
  500. bool DoInit(TContext& ctx, ISource* src) override {
  501. if (!Args.empty()) {
  502. Args[0] = BuildFileNameArgument(ctx.Pos(), Args[0]);
  503. }
  504. return TCallNode::DoInit(ctx, src);
  505. }
  506. TNodePtr DoClone() const final {
  507. return new TYqlAtom(Pos, OpName, Args);
  508. }
  509. };
  510. class TAddMember final: public TCallNode {
  511. public:
  512. TAddMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  513. : TCallNode(pos, opName, 3, 3, args)
  514. {}
  515. bool DoInit(TContext& ctx, ISource* src) override {
  516. if (Args.size() != 3) {
  517. ctx.Error(Pos) << OpName << " requires exactly three arguments";
  518. return false;
  519. }
  520. for (const auto& arg : Args) {
  521. if (!arg->Init(ctx, src)) {
  522. return false;
  523. }
  524. }
  525. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  526. return TCallNode::DoInit(ctx, src);
  527. }
  528. TNodePtr DoClone() const final {
  529. return new TAddMember(Pos, OpName, Args);
  530. }
  531. };
  532. class TRemoveMember final: public TCallNode {
  533. public:
  534. TRemoveMember(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  535. : TCallNode(pos, opName, 2, 2, args)
  536. {}
  537. bool DoInit(TContext& ctx, ISource* src) override {
  538. if (Args.size() != 2) {
  539. ctx.Error(Pos) << OpName << " requires exactly two arguments";
  540. return false;
  541. }
  542. for (const auto& arg : Args) {
  543. if (!arg->Init(ctx, src)) {
  544. return false;
  545. }
  546. }
  547. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  548. return TCallNode::DoInit(ctx, src);
  549. }
  550. TNodePtr DoClone() const final {
  551. return new TRemoveMember(Pos, OpName, Args);
  552. }
  553. };
  554. class TCombineMembers final: public TCallNode {
  555. public:
  556. TCombineMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  557. : TCallNode(pos, opName, 1, -1, args)
  558. {}
  559. bool DoInit(TContext& ctx, ISource* src) override {
  560. if (Args.empty()) {
  561. ctx.Error(Pos) << "CombineMembers requires at least one argument";
  562. return false;
  563. }
  564. for (size_t i = 0; i < Args.size(); ++i) {
  565. Args[i] = Q(Y(Q(""), Args[i])); // flatten without prefix
  566. }
  567. return TCallNode::DoInit(ctx, src);
  568. }
  569. TNodePtr DoClone() const final {
  570. return new TCombineMembers(Pos, OpName, Args);
  571. }
  572. };
  573. class TFlattenMembers final: public TCallNode {
  574. public:
  575. TFlattenMembers(TPosition pos, const TString& opName, const TVector<TNodePtr>& args)
  576. : TCallNode(pos, opName, 1, -1, args)
  577. {}
  578. bool DoInit(TContext& ctx, ISource* src) override {
  579. if (Args.empty()) {
  580. ctx.Error(Pos) << OpName << " requires at least one argument";
  581. return false;
  582. }
  583. for (size_t i = 0; i < Args.size(); ++i) {
  584. if (!Args[i]->Init(ctx, src)) {
  585. return false;
  586. }
  587. if (Args[i]->GetTupleSize() == 2) {
  588. // flatten with prefix
  589. Args[i] = Q(Y(
  590. MakeAtomFromExpression(ctx, Args[i]->GetTupleElement(0)).Build(),
  591. Args[i]->GetTupleElement(1)
  592. ));
  593. } else {
  594. ctx.Error(Pos) << OpName << " requires arguments to be tuples of size 2: prefix and struct";
  595. return false;
  596. }
  597. }
  598. return TCallNode::DoInit(ctx, src);
  599. }
  600. TNodePtr DoClone() const final {
  601. return new TFlattenMembers(Pos, OpName, Args);
  602. }
  603. };
  604. TString NormalizeTypeString(const TString& str) {
  605. auto ret = to_title(str);
  606. if (ret.StartsWith("Tz")) {
  607. ret = "Tz" + to_title(ret.substr(2));
  608. }
  609. return ret;
  610. }
  611. static const TSet<TString> AvailableDataTypes = {"Bool", "String", "Uint32", "Uint64", "Int32", "Int64", "Float", "Double", "Utf8", "Yson", "Json",
  612. "Date", "Datetime", "Timestamp", "Interval", "Uint8", "Int8", "Uint16", "Int16", "TzDate", "TzDatetime", "TzTimestamp", "Uuid", "Decimal"};
  613. TNodePtr GetDataTypeStringNode(TContext& ctx, TCallNode& node, unsigned argNum, TString* outTypeStrPtr = nullptr) {
  614. auto errMsgFunc = [&node, argNum]() {
  615. static std::array<TString, 2> numToName = {{"first", "second"}};
  616. TStringBuilder sb;
  617. sb << "At " << numToName.at(argNum) << " argument of " << node.GetOpName() << " expected type string, available one of: "
  618. << JoinRange(", ", AvailableDataTypes.begin(), AvailableDataTypes.end()) << ";";
  619. return TString(sb);
  620. };
  621. auto typeStringNode = node.GetArgs().at(argNum);
  622. auto typeStringPtr = typeStringNode->GetLiteral("String");
  623. TNodePtr dataTypeNode;
  624. if (typeStringPtr) {
  625. TString typeString = NormalizeTypeString(*typeStringPtr);
  626. if (!AvailableDataTypes.contains(typeString)) {
  627. ctx.Error(typeStringNode->GetPos()) << "Bad type string: '" << typeString << "'. " << errMsgFunc();
  628. return {};
  629. }
  630. if (outTypeStrPtr) {
  631. *outTypeStrPtr = typeString;
  632. }
  633. dataTypeNode = typeStringNode->Q(typeString);
  634. } else {
  635. ctx.Error(typeStringNode->GetPos()) << errMsgFunc();
  636. return {};
  637. }
  638. return dataTypeNode;
  639. }
  640. class TYqlParseFileOp final: public TCallNode {
  641. public:
  642. TYqlParseFileOp(TPosition pos, const TVector<TNodePtr>& args)
  643. : TCallNode(pos, "ParseFile", 2, 2, args)
  644. {}
  645. bool DoInit(TContext& ctx, ISource* src) override {
  646. if (!ValidateArguments(ctx)) {
  647. return false;
  648. }
  649. auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
  650. if (!dataTypeStringNode) {
  651. return false;
  652. }
  653. auto aliasNode = BuildFileNameArgument(Args[1]->GetPos(), Args[1]);
  654. OpName = "Apply";
  655. Args[0] = Y("Udf", Q("File.ByLines"), Y("Void"),
  656. Y("TupleType",
  657. Y("TupleType", Y("DataType", dataTypeStringNode)),
  658. Y("StructType"),
  659. Y("TupleType")));
  660. Args[1] = Y("FilePath", aliasNode);
  661. return TCallNode::DoInit(ctx, src);
  662. }
  663. TString GetOpName() const override {
  664. return "ParseFile";
  665. }
  666. TNodePtr DoClone() const final {
  667. return new TYqlParseFileOp(Pos, Args);
  668. }
  669. };
  670. class TYqlDataType final : public TCallNode {
  671. public:
  672. TYqlDataType(TPosition pos, const TVector<TNodePtr>& args)
  673. : TCallNode(pos, "DataType", 1, 1, args)
  674. {}
  675. bool DoInit(TContext& ctx, ISource* src) override {
  676. if (!ValidateArguments(ctx)) {
  677. return false;
  678. }
  679. auto dataTypeStringNode = GetDataTypeStringNode(ctx, *this, 0);
  680. if (!dataTypeStringNode) {
  681. return false;
  682. }
  683. Args[0] = dataTypeStringNode;
  684. return TCallNode::DoInit(ctx, src);
  685. }
  686. TNodePtr DoClone() const final {
  687. return new TYqlDataType(Pos, Args);
  688. }
  689. };
  690. TNodePtr TryBuildDataType(TPosition pos, const TString& stringType) {
  691. auto normStringType = NormalizeTypeString(stringType);
  692. if (!AvailableDataTypes.contains(normStringType)) {
  693. return {};
  694. }
  695. return new TYqlDataType(pos, {BuildLiteralRawString(pos, normStringType)});
  696. }
  697. class TYqlResourceType final : public TCallNode {
  698. public:
  699. TYqlResourceType(TPosition pos, const TVector<TNodePtr>& args)
  700. : TCallNode(pos, "ResourceType", 1, 1, args)
  701. {}
  702. bool DoInit(TContext& ctx, ISource* src) override {
  703. if (!ValidateArguments(ctx)) {
  704. return false;
  705. }
  706. if (!Args[0]->Init(ctx, src)) {
  707. return false;
  708. }
  709. Args[0] = MakeAtomFromExpression(ctx, Args[0]).Build();
  710. return TCallNode::DoInit(ctx, src);
  711. }
  712. TNodePtr DoClone() const final {
  713. return new TYqlResourceType(Pos, Args);
  714. }
  715. };
  716. class TYqlTaggedType final : public TCallNode {
  717. public:
  718. TYqlTaggedType(TPosition pos, const TVector<TNodePtr>& args)
  719. : TCallNode(pos, "TaggedType", 2, 2, args)
  720. {}
  721. bool DoInit(TContext& ctx, ISource* src) override {
  722. if (!ValidateArguments(ctx)) {
  723. return false;
  724. }
  725. if (!Args[1]->Init(ctx, src)) {
  726. return false;
  727. }
  728. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  729. return TCallNode::DoInit(ctx, src);
  730. }
  731. TNodePtr DoClone() const final {
  732. return new TYqlTaggedType(Pos, Args);
  733. }
  734. };
  735. class TYqlCallableType final : public TCallNode {
  736. public:
  737. TYqlCallableType(TPosition pos, const TVector<TNodePtr>& args)
  738. : TCallNode(pos, "CallableType", 2, -1, args)
  739. {}
  740. bool DoInit(TContext& ctx, ISource* src) override {
  741. if (!ValidateArguments(ctx)) {
  742. return false;
  743. }
  744. if (!dynamic_cast<TTupleNode*>(Args[0].Get())) {
  745. ui32 numOptArgs;
  746. if (!Parseui32(Args[0], numOptArgs)) {
  747. ctx.Error(Args[0]->GetPos()) << "Expected either tuple or number of optional arguments";
  748. return false;
  749. }
  750. Args[0] = Q(Y(BuildQuotedAtom(Args[0]->GetPos(), ToString(numOptArgs))));
  751. }
  752. if (!dynamic_cast<TTupleNode*>(Args[1].Get())) {
  753. Args[1] = Q(Y(Args[1]));
  754. }
  755. for (ui32 index = 2; index < Args.size(); ++index) {
  756. if (!dynamic_cast<TTupleNode*>(Args[index].Get())) {
  757. Args[index] = Q(Y(Args[index]));
  758. }
  759. }
  760. return TCallNode::DoInit(ctx, src);
  761. }
  762. TNodePtr DoClone() const final {
  763. return new TYqlCallableType(Pos, Args);
  764. }
  765. };
  766. class TYqlTupleElementType final : public TCallNode {
  767. public:
  768. TYqlTupleElementType(TPosition pos, const TVector<TNodePtr>& args)
  769. : TCallNode(pos, "TupleElementType", 2, 2, args)
  770. {}
  771. bool DoInit(TContext& ctx, ISource* src) override {
  772. if (!ValidateArguments(ctx)) {
  773. return false;
  774. }
  775. if (!Args[1]->Init(ctx, src)) {
  776. return false;
  777. }
  778. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  779. return TCallNode::DoInit(ctx, src);
  780. }
  781. TNodePtr DoClone() const final {
  782. return new TYqlTupleElementType(Pos, Args);
  783. }
  784. };
  785. class TYqlStructMemberType final : public TCallNode {
  786. public:
  787. TYqlStructMemberType(TPosition pos, const TVector<TNodePtr>& args)
  788. : TCallNode(pos, "StructMemberType", 2, 2, args)
  789. {}
  790. bool DoInit(TContext& ctx, ISource* src) override {
  791. if (!ValidateArguments(ctx)) {
  792. return false;
  793. }
  794. if (!Args[1]->Init(ctx, src)) {
  795. return false;
  796. }
  797. Args[1] = MakeAtomFromExpression(ctx, Args[1]).Build();
  798. return TCallNode::DoInit(ctx, src);
  799. }
  800. TNodePtr DoClone() const final {
  801. return new TYqlStructMemberType(Pos, Args);
  802. }
  803. };
  804. class TYqlCallableArgumentType final : public TCallNode {
  805. public:
  806. TYqlCallableArgumentType(TPosition pos, const TVector<TNodePtr>& args)
  807. : TCallNode(pos, "CallableArgumentType", 2, 2, args)
  808. {}
  809. bool DoInit(TContext& ctx, ISource* src) override {
  810. if (!ValidateArguments(ctx)) {
  811. return false;
  812. }
  813. ui32 index;
  814. if (!Parseui32(Args[1], index)) {
  815. ctx.Error(Args[1]->GetPos()) << "Expected index of the callable argument";
  816. return false;
  817. }
  818. Args[1] = BuildQuotedAtom(Args[1]->GetPos(), ToString(index));
  819. return TCallNode::DoInit(ctx, src);
  820. }
  821. TNodePtr DoClone() const final {
  822. return new TYqlCallableArgumentType(Pos, Args);
  823. }
  824. };
  825. class TStructTypeNode : public TAstListNode {
  826. public:
  827. TStructTypeNode(TPosition pos, const TVector<TNodePtr>& exprs)
  828. : TAstListNode(pos)
  829. , Exprs(exprs)
  830. {}
  831. bool DoInit(TContext& ctx, ISource* src) override {
  832. Nodes.push_back(BuildAtom(Pos, "StructType", TNodeFlags::Default));
  833. for (const auto& expr : Exprs) {
  834. const auto& label = expr->GetLabel();
  835. if (!label) {
  836. ctx.Error(expr->GetPos()) << "Structure does not allow anonymous members";
  837. return false;
  838. }
  839. Nodes.push_back(Q(Y(Q(label), expr)));
  840. }
  841. return TAstListNode::DoInit(ctx, src);
  842. }
  843. TNodePtr DoClone() const final {
  844. return new TStructTypeNode(Pos, CloneContainer(Exprs));
  845. }
  846. private:
  847. const TVector<TNodePtr> Exprs;
  848. };
  849. template <bool IsStrict>
  850. class TYqlIf final: public TCallNode {
  851. public:
  852. TYqlIf(TPosition pos, const TVector<TNodePtr>& args)
  853. : TCallNode(pos, IsStrict ? "IfStrict" : "If", 2, 3, args)
  854. {}
  855. private:
  856. TCallNode::TPtr DoClone() const override {
  857. return new TYqlIf(GetPos(), CloneContainer(Args));
  858. }
  859. bool DoInit(TContext& ctx, ISource* src) override {
  860. if (!ValidateArguments(ctx)) {
  861. return false;
  862. }
  863. Args[0] = Y("Coalesce", Args[0], Y("Bool", Q("false")));
  864. if (Args.size() == 2) {
  865. Args.push_back(Y("Null"));
  866. }
  867. return TCallNode::DoInit(ctx, src);
  868. }
  869. };
  870. class TYqlSubstring final: public TCallNode {
  871. public:
  872. TYqlSubstring(TPosition pos, const TVector<TNodePtr>& args)
  873. : TCallNode(pos, "Substring", 2, 3, args)
  874. {}
  875. private:
  876. TCallNode::TPtr DoClone() const override {
  877. return new TYqlSubstring(GetPos(), CloneContainer(Args));
  878. }
  879. bool DoInit(TContext& ctx, ISource* src) override {
  880. if (Args.size() == 2) {
  881. Args.push_back(Y("Uint32", Q(ToString(Max<ui32>()))));
  882. }
  883. return TCallNode::DoInit(ctx, src);
  884. }
  885. };
  886. class TYqlIn final: public TCallNode {
  887. public:
  888. TYqlIn(TPosition pos, const TVector<TNodePtr>& args)
  889. : TCallNode(pos, "IN", 3, 3, args)
  890. {}
  891. private:
  892. TNodePtr DoClone() const final {
  893. return new TYqlIn(Pos, CloneContainer(Args));
  894. }
  895. bool DoInit(TContext& ctx, ISource* src) override {
  896. if (!ValidateArguments(ctx)) {
  897. return false;
  898. }
  899. auto key = Args[0];
  900. auto inNode = Args[1];
  901. auto hints = Args[2];
  902. if (!key->Init(ctx, src)) {
  903. return false;
  904. }
  905. if (!inNode->Init(ctx, inNode->GetSource() ? nullptr : src)) {
  906. return false;
  907. }
  908. if (inNode->GetLiteral("String")) {
  909. ctx.Error(inNode->GetPos()) << "Unable to use IN predicate with string argument, it won't search substring - "
  910. "expecting tuple, list, dict or single column table source";
  911. return false;
  912. }
  913. if (inNode->GetTupleSize() == 1) {
  914. auto singleElement = inNode->GetTupleElement(0);
  915. // TODO: 'IN ((select ...))' is parsed exactly like 'IN (select ...)' instead of a single element tuple
  916. if (singleElement->GetSource() || singleElement->IsSelect()) {
  917. TStringBuf parenKind = singleElement->GetSource() ? "" : "external ";
  918. ctx.Warning(inNode->GetPos(),
  919. TIssuesIds::YQL_CONST_SUBREQUEST_IN_LIST) << "Using subrequest in scalar context after IN, "
  920. << "perhaps you should remove "
  921. << parenKind << "parenthesis here";
  922. }
  923. }
  924. if (inNode->GetSource() || inNode->IsSelect()) {
  925. TVector<TNodePtr> hintElements;
  926. for (size_t i = 0; i < hints->GetTupleSize(); ++i) {
  927. hintElements.push_back(hints->GetTupleElement(i));
  928. }
  929. auto pos = inNode->GetPos();
  930. auto tableSourceHint = BuildTuple(pos, { BuildQuotedAtom(pos, "tableSource", NYql::TNodeFlags::Default) });
  931. hintElements.push_back(tableSourceHint);
  932. hints = BuildTuple(pos, hintElements);
  933. }
  934. OpName = "SqlIn";
  935. MinArgs = MaxArgs = 3;
  936. Args = {
  937. inNode->GetSource() ? inNode->GetSource() : inNode,
  938. key,
  939. hints
  940. };
  941. return TCallNode::DoInit(ctx, src);
  942. }
  943. TString GetOpName() const override {
  944. return "IN predicate";
  945. }
  946. };
  947. class TYqlUdfBase : public TCallNode {
  948. public:
  949. TYqlUdfBase(TPosition pos, const TString& name)
  950. : TCallNode(pos, "Udf", 1, 1, UdfArgs(pos, name))
  951. {}
  952. TYqlUdfBase(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  953. : TCallNode(pos, "Udf", argsCount, argsCount, UdfArgs(pos, name, &args))
  954. {}
  955. protected:
  956. TYqlUdfBase(TPosition pos, const TString& opName, ui32 minArgs, ui32 maxArgs, const TVector<TNodePtr>& args)
  957. : TCallNode(pos, opName, minArgs, maxArgs, args)
  958. {}
  959. private:
  960. static TVector<TNodePtr> UdfArgs(TPosition pos, const TString& name, const TVector<TNodePtr>* args = nullptr) {
  961. TVector<TNodePtr> res = { BuildQuotedAtom(pos, name) };
  962. if (args) {
  963. res.insert(res.end(), args->begin(), args->end());
  964. }
  965. return res;
  966. }
  967. void DoUpdateState() const override {
  968. TCallNode::DoUpdateState();
  969. State.Set(ENodeState::Aggregated, false/*!RunConfig || RunConfig->IsAggregated()*/);
  970. State.Set(ENodeState::Const, true /* FIXME: To avoid CheckAggregationLevel issue for non-const TypeOf. */);
  971. }
  972. private:
  973. TNodePtr RunConfig;
  974. };
  975. class TYqlUdf final : public TYqlUdfBase {
  976. public:
  977. TYqlUdf(TPosition pos, const TString& name)
  978. : TYqlUdfBase(pos, name)
  979. {}
  980. TYqlUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  981. : TYqlUdfBase(pos, name, args, argsCount)
  982. {}
  983. private:
  984. TYqlUdf(const TYqlUdf& other)
  985. : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
  986. {}
  987. TNodePtr DoClone() const final {
  988. return new TYqlUdf(*this);
  989. }
  990. };
  991. class TYqlTypeConfigUdf final : public TYqlUdfBase {
  992. public:
  993. TYqlTypeConfigUdf(TPosition pos, const TString& name)
  994. : TYqlUdfBase(pos, name)
  995. {}
  996. TYqlTypeConfigUdf(TPosition pos, const TString& name, const TVector<TNodePtr>& args, ui32 argsCount = 2)
  997. : TYqlUdfBase(pos, name, args, argsCount)
  998. {}
  999. private:
  1000. TYqlTypeConfigUdf(const TYqlTypeConfigUdf& other)
  1001. : TYqlUdfBase(other.GetPos(), "Udf", other.MinArgs, other.MaxArgs, CloneContainer(other.Args))
  1002. {}
  1003. bool DoInit(TContext& ctx, ISource* src) override {
  1004. if (!ValidateArguments(ctx)) {
  1005. return false;
  1006. }
  1007. if (!Args[3]->Init(ctx, src)) {
  1008. return false;
  1009. }
  1010. Args[3] = MakeAtomFromExpression(ctx, Args[3]).Build();
  1011. return TYqlUdfBase::DoInit(ctx, src);
  1012. }
  1013. TNodePtr DoClone() const final {
  1014. return new TYqlTypeConfigUdf(*this);
  1015. }
  1016. };
  1017. class TWeakFieldOp final: public TCallNode {
  1018. public:
  1019. TWeakFieldOp(TPosition pos, const TVector<TNodePtr>& args)
  1020. : TCallNode(pos, "WeakField", 2, 3, args)
  1021. {}
  1022. bool DoInit(TContext& ctx, ISource* src) override {
  1023. if (!src) {
  1024. ctx.Error(Pos) << GetCallExplain() << " unable use without source";
  1025. return false;
  1026. }
  1027. src->AllColumns();
  1028. if (!ValidateArguments(ctx)) {
  1029. return false;
  1030. }
  1031. PrecacheState();
  1032. const auto memberPos = Args[0]->GetPos();
  1033. TVector<TNodePtr> repackArgs = {BuildAtom(memberPos, "row", NYql::TNodeFlags::Default)};
  1034. if (auto literal = Args[1]->GetLiteral("String")) {
  1035. TString targetType;
  1036. if (!GetDataTypeStringNode(ctx, *this, 1, &targetType)) {
  1037. return false;
  1038. }
  1039. repackArgs.push_back(Args[1]->Q(targetType));
  1040. } else {
  1041. repackArgs.push_back(Args[1]);
  1042. }
  1043. TVector<TNodePtr> column;
  1044. auto namePtr = Args[0]->GetColumnName();
  1045. if (!namePtr || !*namePtr) {
  1046. ctx.Error(Pos) << GetCallExplain() << " expect as first argument column name";
  1047. return false;
  1048. }
  1049. auto memberName = *namePtr;
  1050. column.push_back(Args[0]->Q(*namePtr));
  1051. if (src->GetJoin() && !src->IsJoinKeysInitializing()) {
  1052. const auto sourcePtr = Args[0]->GetSourceName();
  1053. if (!sourcePtr || !*sourcePtr) {
  1054. ctx.Error(Pos) << GetOpName() << " required to have correlation name in case of JOIN for column at first parameter";
  1055. return false;
  1056. }
  1057. column.push_back(Args[0]->Q(*sourcePtr));
  1058. memberName = DotJoin(*sourcePtr, memberName);
  1059. }
  1060. if (!GetLabel()) {
  1061. SetLabel(memberName);
  1062. }
  1063. repackArgs.push_back(BuildTuple(memberPos, column));
  1064. if (Args.size() == 3) {
  1065. repackArgs.push_back(Args[2]);
  1066. }
  1067. ++MinArgs;
  1068. ++MaxArgs;
  1069. Args.swap(repackArgs);
  1070. return TCallNode::DoInit(ctx, src);
  1071. }
  1072. TNodePtr DoClone() const final {
  1073. return new TWeakFieldOp(Pos, Args);
  1074. }
  1075. };
  1076. class TTableRow final : public TAstAtomNode {
  1077. public:
  1078. TTableRow(TPosition pos, const TVector<TNodePtr>& args)
  1079. : TTableRow(pos, args.size())
  1080. {}
  1081. TTableRow(TPosition pos, ui32 argsCount)
  1082. : TAstAtomNode(pos, "row", 0)
  1083. , ArgsCount(argsCount)
  1084. {}
  1085. bool DoInit(TContext& ctx, ISource* src) override {
  1086. if (!src || src->IsFake()) {
  1087. ctx.Error(Pos) << "TableRow requires FROM section";
  1088. return false;
  1089. }
  1090. if (ArgsCount > 0) {
  1091. ctx.Error(Pos) << "TableRow requires exactly 0 arguments";
  1092. return false;
  1093. }
  1094. src->AllColumns();
  1095. return true;
  1096. }
  1097. void DoUpdateState() const final {
  1098. State.Set(ENodeState::Const, false);
  1099. }
  1100. TNodePtr DoClone() const final {
  1101. return MakeIntrusive<TTableRow>(Pos, ArgsCount);
  1102. }
  1103. private:
  1104. ui32 ArgsCount;
  1105. };
  1106. TNodePtr BuildUdfUserTypeArg(TPosition pos, const TVector<TNodePtr>& args, TNodePtr customUserType) {
  1107. TVector<TNodePtr> argsTypeItems;
  1108. for (auto& arg : args) {
  1109. argsTypeItems.push_back(new TCallNodeImpl(pos, "TypeOf", TVector<TNodePtr>(1, arg)));
  1110. }
  1111. TVector<TNodePtr> userTypeItems;
  1112. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", argsTypeItems));
  1113. userTypeItems.push_back(new TCallNodeImpl(pos, "StructType", {}));
  1114. if (customUserType) {
  1115. userTypeItems.push_back(customUserType);
  1116. } else {
  1117. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
  1118. }
  1119. return new TCallNodeImpl(pos, "TupleType", userTypeItems);
  1120. }
  1121. TNodePtr BuildUdfUserTypeArg(TPosition pos, TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
  1122. TVector<TNodePtr> userTypeItems;
  1123. userTypeItems.reserve(3);
  1124. userTypeItems.push_back(positionalArgs->Y("TypeOf", positionalArgs));
  1125. userTypeItems.push_back(positionalArgs->Y("TypeOf", namedArgs));
  1126. if (customUserType) {
  1127. userTypeItems.push_back(customUserType);
  1128. } else {
  1129. userTypeItems.push_back(new TCallNodeImpl(pos, "TupleType", {}));
  1130. }
  1131. return new TCallNodeImpl(pos, "TupleType", userTypeItems);
  1132. }
  1133. TVector<TNodePtr> BuildUdfArgs(const TContext& ctx, TPosition pos, const TVector<TNodePtr>& args,
  1134. TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType) {
  1135. if (!ctx.Settings.EnableGenericUdfs) {
  1136. return {};
  1137. }
  1138. TVector<TNodePtr> udfArgs;
  1139. udfArgs.push_back(new TAstListNodeImpl(pos));
  1140. udfArgs[0]->Add(new TAstAtomNodeImpl(pos, "Void", 0));
  1141. if (namedArgs) {
  1142. udfArgs.push_back(BuildUdfUserTypeArg(pos, positionalArgs, namedArgs, customUserType));
  1143. } else {
  1144. udfArgs.push_back(BuildUdfUserTypeArg(pos, args, customUserType));
  1145. }
  1146. return udfArgs;
  1147. }
  1148. class TCallableNode final: public INode {
  1149. public:
  1150. TCallableNode(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args)
  1151. : INode(pos)
  1152. , Module(module)
  1153. , Name(name)
  1154. , Args(args)
  1155. {}
  1156. bool DoInit(TContext& ctx, ISource* src) override {
  1157. if (Module == "yql") {
  1158. ui32 flags;
  1159. TString nameParseError;
  1160. TPosition pos = Pos;
  1161. TString parsedName;
  1162. if (!TryStringContent(Name, parsedName, flags, nameParseError, pos)) {
  1163. ctx.Error(pos) << "Failed to parse YQL: " << nameParseError;
  1164. return false;
  1165. }
  1166. const TString yql("(" + parsedName + ")");
  1167. TAstParseResult ast = ParseAst(yql, ctx.Pool.get());
  1168. /// TODO: do not drop warnings
  1169. if (ast.IsOk()) {
  1170. Node = AstNode(ast.Root->GetChild(0));
  1171. } else {
  1172. ctx.Error(Pos) << "Failed to parse YQL: " << ast.Issues.ToString();
  1173. return false;
  1174. }
  1175. if (src) {
  1176. src->AllColumns();
  1177. }
  1178. } else if (ctx.Settings.ModuleMapping.contains(Module)) {
  1179. Node = Y("bind", Module + "_module", Q(Name));
  1180. if (src) {
  1181. src->AllColumns();
  1182. }
  1183. } else {
  1184. TNodePtr customUserType = nullptr;
  1185. if (Module == "Tensorflow" && Name == "RunBatch") {
  1186. if (Args.size() > 2) {
  1187. auto passThroughAtom = Q("PassThrough");
  1188. auto passThroughType = Y("StructMemberType", Y("ListItemType", Y("TypeOf", Args[1])), passThroughAtom);
  1189. customUserType = Y("AddMemberType", Args[2], passThroughAtom, passThroughType);
  1190. Args.erase(Args.begin() + 2);
  1191. }
  1192. }
  1193. auto udfArgs = BuildUdfArgs(ctx, Pos, Args, nullptr, nullptr, customUserType);
  1194. Node = BuildUdf(ctx, Pos, Module, Name, udfArgs);
  1195. }
  1196. return Node->Init(ctx, src);
  1197. }
  1198. TAstNode* Translate(TContext& ctx) const override {
  1199. Y_DEBUG_ABORT_UNLESS(Node);
  1200. return Node->Translate(ctx);
  1201. }
  1202. void DoUpdateState() const override {
  1203. YQL_ENSURE(Node);
  1204. State.Set(ENodeState::Const, Node->IsConstant());
  1205. State.Set(ENodeState::Aggregated, Node->IsAggregated());
  1206. }
  1207. TNodePtr DoClone() const override {
  1208. return new TCallableNode(Pos, Module, Name, Args);
  1209. }
  1210. private:
  1211. TCiString Module;
  1212. TString Name;
  1213. TVector<TNodePtr> Args;
  1214. TNodePtr Node;
  1215. };
  1216. TNodePtr BuildCallable(TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
  1217. return new TCallableNode(pos, module, name, args);
  1218. }
  1219. TNodePtr BuildUdf(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args) {
  1220. auto fullName = module + "." + name;
  1221. if (!args.empty()) {
  1222. return new TYqlUdf(pos, fullName, args, args.size() + 1);
  1223. } else {
  1224. auto varName = ctx.AddSimpleUdf(fullName);
  1225. return new TAstAtomNodeImpl(pos, varName, TNodeFlags::ArbitraryContent);
  1226. }
  1227. }
  1228. class TScriptUdf final: public INode {
  1229. public:
  1230. TScriptUdf(TPosition pos, const TString& moduleName, const TString& funcName, const TVector<TNodePtr>& args)
  1231. : INode(pos)
  1232. , ModuleName(moduleName)
  1233. , FuncName(funcName)
  1234. , Args(args)
  1235. {}
  1236. bool DoInit(TContext& ctx, ISource* src) override {
  1237. const bool isPython = ModuleName.find(TStringBuf("Python")) != TString::npos;
  1238. if (!isPython) {
  1239. if (Args.size() != 2) {
  1240. ctx.Error(Pos) << ModuleName << " script declaration requires exactly two parameters";
  1241. return false;
  1242. }
  1243. } else {
  1244. if (Args.size() < 1 || Args.size() > 2) {
  1245. ctx.Error(Pos) << ModuleName << " script declaration requires one or two parameters";
  1246. return false;
  1247. }
  1248. }
  1249. auto nameAtom = BuildQuotedAtom(Pos, FuncName);
  1250. auto scriptNode = Args.back();
  1251. if (!scriptNode->Init(ctx, src)) {
  1252. return false;
  1253. }
  1254. auto scriptStrPtr = Args.back()->GetLiteral("String");
  1255. if (scriptStrPtr && scriptStrPtr->size() > SQL_MAX_INLINE_SCRIPT_LEN) {
  1256. scriptNode = ctx.UniversalAlias("scriptudf", std::move(scriptNode));
  1257. }
  1258. INode::TPtr type;
  1259. if (Args.size() == 2) {
  1260. auto literal = Args[0]->GetLiteral("String");
  1261. if (literal) {
  1262. auto parsed = ParseType(*literal, *ctx.Pool, ctx.Issues, Args[0]->GetPos());
  1263. if (!parsed) {
  1264. ctx.Error(Args[0]->GetPos()) << "Failed to parse script signature";
  1265. return false;
  1266. }
  1267. type = AstNode(parsed);
  1268. } else {
  1269. type = Args[0];
  1270. }
  1271. } else {
  1272. // Python supports getting functions signatures right from docstrings
  1273. type = Y("EvaluateType", Y("ParseTypeHandle", Y("Apply",
  1274. Y("bind", "core_module", Q("PythonFuncSignature")),
  1275. Q(ModuleName),
  1276. scriptNode,
  1277. Y("String", nameAtom)
  1278. )));
  1279. }
  1280. if (!type->Init(ctx, src)) {
  1281. return false;
  1282. }
  1283. Node = Y("ScriptUdf", Q(ModuleName), nameAtom, type, scriptNode);
  1284. return true;
  1285. }
  1286. TAstNode* Translate(TContext& ctx) const override {
  1287. Y_UNUSED(ctx);
  1288. Y_DEBUG_ABORT_UNLESS(Node);
  1289. return Node->Translate(ctx);
  1290. }
  1291. void DoUpdateState() const override {
  1292. State.Set(ENodeState::Const, true);
  1293. }
  1294. TNodePtr DoClone() const final {
  1295. return new TScriptUdf(GetPos(), ModuleName, FuncName, CloneContainer(Args));
  1296. }
  1297. private:
  1298. TString ModuleName;
  1299. TString FuncName;
  1300. TVector<TNodePtr> Args;
  1301. TNodePtr Node;
  1302. };
  1303. template <bool Sorted>
  1304. class TYqlToDict final: public TCallNode {
  1305. public:
  1306. TYqlToDict(TPosition pos, const TString& mode, const TVector<TNodePtr>& args)
  1307. : TCallNode(pos, "ToDict", 4, 4, args)
  1308. , Mode(mode)
  1309. {}
  1310. private:
  1311. TCallNode::TPtr DoClone() const override {
  1312. return new TYqlToDict<Sorted>(GetPos(), Mode, CloneContainer(Args));
  1313. }
  1314. bool DoInit(TContext& ctx, ISource* src) override {
  1315. if (Args.size() != 1) {
  1316. ctx.Error(Pos) << "ToDict required exactly one argument";
  1317. return false;
  1318. }
  1319. Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("0"))));
  1320. Args.push_back(BuildLambda(Pos, Y("val"), Y("Nth", "val", Q("1"))));
  1321. Args.push_back(Q(Y(Q(Sorted ? "Sorted" : "Hashed"), Q(Mode))));
  1322. return TCallNode::DoInit(ctx, src);
  1323. }
  1324. private:
  1325. TString Mode;
  1326. };
  1327. template <bool IsStart>
  1328. class THoppingTime final: public TAstListNode {
  1329. public:
  1330. THoppingTime(TPosition pos, const TVector<TNodePtr>& args = {})
  1331. : TAstListNode(pos)
  1332. {
  1333. Y_UNUSED(args);
  1334. }
  1335. private:
  1336. TNodePtr DoClone() const override {
  1337. return new THoppingTime(GetPos());
  1338. }
  1339. bool DoInit(TContext& ctx, ISource* src) override {
  1340. Y_UNUSED(ctx);
  1341. auto window = src->GetHoppingWindowSpec();
  1342. if (!window) {
  1343. ctx.Error(Pos) << "No hopping window parameters in aggregation";
  1344. return false;
  1345. }
  1346. Nodes.clear();
  1347. if (!IsStart) {
  1348. Add("Member", "row", Q("_yql_time"));
  1349. return true;
  1350. }
  1351. Add("Sub",
  1352. Y("Member", "row", Q("_yql_time")),
  1353. window->Interval);
  1354. return true;
  1355. }
  1356. void DoUpdateState() const override {
  1357. State.Set(ENodeState::Aggregated, true);
  1358. }
  1359. };
  1360. class TInvalidBuiltin final: public INode {
  1361. public:
  1362. TInvalidBuiltin(TPosition pos, const TString& info)
  1363. : INode(pos)
  1364. , Info(info)
  1365. {
  1366. }
  1367. bool DoInit(TContext& ctx, ISource*) override {
  1368. ctx.Error(Pos) << Info;
  1369. return false;
  1370. }
  1371. TAstNode* Translate(TContext&) const override {
  1372. return nullptr;
  1373. }
  1374. TPtr DoClone() const override {
  1375. return {};
  1376. }
  1377. private:
  1378. TString Info;
  1379. };
  1380. enum EAggrFuncTypeCallback {
  1381. NORMAL,
  1382. WINDOW_AUTOARGS,
  1383. KEY_PAYLOAD,
  1384. PAYLOAD_PREDICATE,
  1385. TWO_ARGS,
  1386. COUNT,
  1387. HISTOGRAM,
  1388. LINEAR_HISTOGRAM,
  1389. PERCENTILE,
  1390. TOPFREQ,
  1391. TOP,
  1392. TOP_BY,
  1393. COUNT_DISTINCT_ESTIMATE,
  1394. LIST,
  1395. UDAF
  1396. };
  1397. struct TCoreFuncInfo {
  1398. TString Name;
  1399. ui32 MinArgs;
  1400. ui32 MaxArgs;
  1401. };
  1402. using TAggrFuncFactoryCallback = std::function<INode::TPtr(TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory)>;
  1403. using TAggrFuncFactoryCallbackMap = std::unordered_map<TString, TAggrFuncFactoryCallback, THash<TString>>;
  1404. using TBuiltinFactoryCallback = std::function<TNodePtr(TPosition pos, const TVector<TNodePtr>& args)>;
  1405. using TBuiltinFactoryCallbackMap = std::unordered_map<TString, TBuiltinFactoryCallback, THash<TString>>;
  1406. using TCoreFuncMap = std::unordered_map<TString, TCoreFuncInfo, THash<TString>>;
  1407. TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
  1408. const TString& functionName,
  1409. const TString& factoryName,
  1410. EAggrFuncTypeCallback type = NORMAL,
  1411. const TString& functionNameOverride = TString(),
  1412. const TVector<EAggregateMode>& validModes = {}) {
  1413. const TString realFunctionName = functionNameOverride.empty() ? functionName : functionNameOverride;
  1414. return [functionName, realFunctionName, factoryName, type, validModes] (TPosition pos, const TVector<TNodePtr>& args, EAggregateMode aggMode, bool isFactory) -> INode::TPtr {
  1415. if (!validModes.empty()) {
  1416. if (!IsIn(validModes, aggMode)) {
  1417. const TString errorText = TStringBuilder()
  1418. << "Can't use " << functionName << " in " << ToString(aggMode) << " aggregation mode";
  1419. return INode::TPtr(new TInvalidBuiltin(pos, errorText));
  1420. }
  1421. }
  1422. TAggregationPtr factory = nullptr;
  1423. switch (type) {
  1424. case NORMAL:
  1425. factory = BuildFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1426. break;
  1427. case WINDOW_AUTOARGS:
  1428. factory = BuildFactoryAggregationWinAutoarg(pos, realFunctionName, factoryName, aggMode);
  1429. break;
  1430. case KEY_PAYLOAD:
  1431. factory = BuildKeyPayloadFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1432. break;
  1433. case PAYLOAD_PREDICATE:
  1434. factory = BuildPayloadPredicateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1435. break;
  1436. case TWO_ARGS:
  1437. factory = BuildTwoArgsFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1438. break;
  1439. case COUNT:
  1440. factory = BuildCountAggregation(pos, realFunctionName, factoryName, aggMode);
  1441. break;
  1442. case HISTOGRAM:
  1443. factory = BuildHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1444. break;
  1445. case LINEAR_HISTOGRAM:
  1446. factory = BuildLinearHistogramFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1447. break;
  1448. case PERCENTILE:
  1449. factory = BuildPercentileFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1450. break;
  1451. case TOPFREQ:
  1452. factory = BuildTopFreqFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1453. break;
  1454. case TOP:
  1455. factory = BuildTopFactoryAggregation<false>(pos, realFunctionName, factoryName, aggMode);
  1456. break;
  1457. case TOP_BY:
  1458. factory = BuildTopFactoryAggregation<true>(pos, realFunctionName, factoryName, aggMode);
  1459. break;
  1460. case COUNT_DISTINCT_ESTIMATE:
  1461. factory = BuildCountDistinctEstimateFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1462. break;
  1463. case LIST:
  1464. factory = BuildListFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1465. break;
  1466. case UDAF:
  1467. factory = BuildUserDefinedFactoryAggregation(pos, realFunctionName, factoryName, aggMode);
  1468. break;
  1469. }
  1470. if (isFactory) {
  1471. auto realArgs = args;
  1472. realArgs.erase(realArgs.begin()); // skip function name
  1473. return new TBasicAggrFactory(pos, functionName, factory, realArgs);
  1474. } else {
  1475. return new TBasicAggrFunc(pos, functionName, factory, args);
  1476. }
  1477. };
  1478. }
  1479. TAggrFuncFactoryCallback BuildAggrFuncFactoryCallback(
  1480. const TString& functionName,
  1481. const TString& factoryName,
  1482. const TVector<EAggregateMode>& validModes,
  1483. EAggrFuncTypeCallback type = NORMAL,
  1484. const TString& functionNameOverride = TString()) {
  1485. return BuildAggrFuncFactoryCallback(functionName, factoryName, type, functionNameOverride, validModes);
  1486. }
  1487. template<typename TType>
  1488. TBuiltinFactoryCallback BuildSimpleBuiltinFactoryCallback() {
  1489. return [] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1490. return new TType(pos, args);
  1491. };
  1492. }
  1493. template<typename TType>
  1494. TBuiltinFactoryCallback BuildNamedBuiltinFactoryCallback(const TString& name) {
  1495. return [name] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1496. return new TType(pos, name, args);
  1497. };
  1498. }
  1499. template<typename TType>
  1500. TBuiltinFactoryCallback BuildArgcBuiltinFactoryCallback(i32 minArgs, i32 maxArgs) {
  1501. return [minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1502. return new TType(pos, minArgs, maxArgs, args);
  1503. };
  1504. }
  1505. template<typename TType>
  1506. TBuiltinFactoryCallback BuildNamedArgcBuiltinFactoryCallback(const TString& name, i32 minArgs, i32 maxArgs) {
  1507. return [name, minArgs, maxArgs] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1508. return new TType(pos, name, minArgs, maxArgs, args);
  1509. };
  1510. }
  1511. template<typename TType>
  1512. TBuiltinFactoryCallback BuildNamedDepsArgcBuiltinFactoryCallback(ui32 reqArgsCount, const TString& name, i32 minArgs, i32 maxArgs) {
  1513. return [reqArgsCount, name, minArgs, maxArgs](TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1514. return new TType(reqArgsCount, pos, name, minArgs, maxArgs, args);
  1515. };
  1516. }
  1517. template<typename TType>
  1518. TBuiltinFactoryCallback BuildBoolBuiltinFactoryCallback(bool arg) {
  1519. return [arg] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1520. return new TType(pos, args, arg);
  1521. };
  1522. }
  1523. template<typename TType>
  1524. TBuiltinFactoryCallback BuildFoldBuiltinFactoryCallback(const TString& name, const TString& defaultValue) {
  1525. return [name, defaultValue] (TPosition pos, const TVector<TNodePtr>& args) -> TNodePtr {
  1526. return new TType(pos, name, "Bool", defaultValue, 1, args);
  1527. };
  1528. }
  1529. TNodePtr MakePair(TPosition pos, const TVector<TNodePtr>& args) {
  1530. TNodePtr list = new TAstListNodeImpl(pos, {
  1531. args[0],
  1532. args.size() > 1 ? args[1] : new TAstListNodeImpl(pos,{ new TAstAtomNodeImpl(pos, "Null", TNodeFlags::Default) })
  1533. });
  1534. return new TAstListNodeImpl(pos, {
  1535. new TAstAtomNodeImpl(pos, "quote", TNodeFlags::Default),
  1536. list
  1537. });
  1538. }
  1539. struct TBuiltinFuncData {
  1540. const TBuiltinFactoryCallbackMap BuiltinFuncs;
  1541. const TAggrFuncFactoryCallbackMap AggrFuncs;
  1542. const TCoreFuncMap CoreFuncs;
  1543. TBuiltinFuncData():
  1544. BuiltinFuncs(MakeBuiltinFuncs()),
  1545. AggrFuncs(MakeAggrFuncs()),
  1546. CoreFuncs(MakeCoreFuncs())
  1547. {
  1548. }
  1549. TBuiltinFactoryCallbackMap MakeBuiltinFuncs() {
  1550. TBuiltinFactoryCallbackMap builtinFuncs = {
  1551. // Branching
  1552. {"if", BuildSimpleBuiltinFactoryCallback<TYqlIf<false>>()},
  1553. {"ifstrict", BuildSimpleBuiltinFactoryCallback<TYqlIf<true>>() },
  1554. // String builtins
  1555. {"len", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  1556. {"length", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  1557. {"charlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  1558. {"characterlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Size", 1, 1)},
  1559. {"substring", BuildSimpleBuiltinFactoryCallback<TYqlSubstring>()},
  1560. {"byteat", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ByteAt", 2, 2) },
  1561. // Numeric builtins
  1562. {"abs", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Abs", 1, 1) },
  1563. {"tobytes", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ToBytes", 1, 1) },
  1564. {"frombytes", BuildSimpleBuiltinFactoryCallback<TFromBytes>() },
  1565. // Compare builtins
  1566. {"minof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
  1567. {"maxof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
  1568. {"greatest", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Max", 1, -1)},
  1569. {"least", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Min", 1, -1)},
  1570. {"in", BuildSimpleBuiltinFactoryCallback<TYqlIn>()},
  1571. // List builtins
  1572. {"aslist", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsList", 1, -1)},
  1573. {"asliststrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsListStrict", 1, -1) },
  1574. {"listlength", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Length", 1, 1)},
  1575. {"listhasitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("HasItems", 1, 1)},
  1576. {"listcount", BuildSimpleBuiltinFactoryCallback<TListCountBuiltin>()},
  1577. {"listextend", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Extend", 1, -1)},
  1578. {"listunionall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnionAll", 1, -1) },
  1579. {"listzip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Zip", -1, -1)},
  1580. {"listzipall", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ZipAll", -1, -1)},
  1581. {"listenumerate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Enumerate", 1, 3)},
  1582. {"listreverse", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Reverse", 1, 1)},
  1583. {"listskip", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Skip", 2, 2)},
  1584. {"listtake", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Take", 2, 2)},
  1585. {"listsort", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
  1586. {"listsortasc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(true)},
  1587. {"listsortdesc", BuildBoolBuiltinFactoryCallback<TListSortBuiltin>(false)},
  1588. {"listmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(false)},
  1589. {"listflatmap", BuildBoolBuiltinFactoryCallback<TListMapBuiltin>(true)},
  1590. {"listfilter", BuildSimpleBuiltinFactoryCallback<TListFilterBuiltin>()},
  1591. {"listany", BuildFoldBuiltinFactoryCallback<TListFoldBuiltinImpl>("Or", "false")},
  1592. {"listall", BuildFoldBuiltinFactoryCallback<TListFoldBuiltinImpl>("And", "true")},
  1593. {"listhas", BuildSimpleBuiltinFactoryCallback<TListHasBuiltin>()},
  1594. {"listmax", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrMax")},
  1595. {"listmin", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrMin")},
  1596. {"listsum", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("AggrAdd")},
  1597. {"listavg", BuildSimpleBuiltinFactoryCallback<TListAvgBuiltin>()},
  1598. {"listconcat", BuildNamedBuiltinFactoryCallback<TListFold1Builtin>("Concat")},
  1599. {"listextract", BuildSimpleBuiltinFactoryCallback<TListExtractBuiltin>()},
  1600. {"listuniq", BuildSimpleBuiltinFactoryCallback<TListUniqBuiltin>()},
  1601. {"listcreate", BuildSimpleBuiltinFactoryCallback<TListCreateBuiltin>()},
  1602. {"listfromrange", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListFromRange", 2, 3) },
  1603. {"listreplicate", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Replicate", 2, 2) },
  1604. {"listtakewhile", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TakeWhile", 2, 2) },
  1605. {"listskipwhile", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("SkipWhile", 2, 2) },
  1606. {"listcollect", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Collect", 1, 1) },
  1607. // Dict builtins
  1608. {"dictcreate", BuildSimpleBuiltinFactoryCallback<TDictCreateBuiltin>()},
  1609. {"asdict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDict", 1, -1)},
  1610. {"asdictstrict", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AsDictStrict", 1, -1)},
  1611. {"todict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("One")},
  1612. {"tomultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<false>>("Many")},
  1613. {"tosorteddict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("One")},
  1614. {"tosortedmultidict", BuildNamedBuiltinFactoryCallback<TYqlToDict<true>>("Many")},
  1615. {"dictkeys", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeys", 1, 1) },
  1616. {"dictpayloads", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloads", 1, 1) },
  1617. {"dictitems", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictItems", 1, 1) },
  1618. {"dictlookup", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Lookup", 2, 2) },
  1619. {"dictcontains", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Contains", 2, 2) },
  1620. // Atom builtins
  1621. {"asatom", BuildSimpleBuiltinFactoryCallback<TYqlAsAtom>()},
  1622. {"secureparam", BuildNamedBuiltinFactoryCallback<TYqlAtom>("SecureParam")},
  1623. {"void", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Void", 0, 0)},
  1624. {"callable", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Callable", 2, 2)},
  1625. {"way", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Way", 1, 1) },
  1626. {"variant", BuildSimpleBuiltinFactoryCallback<TYqlVariant>() },
  1627. {"astagged", BuildSimpleBuiltinFactoryCallback<TYqlAsTagged>() },
  1628. {"untag", BuildSimpleBuiltinFactoryCallback<TYqlUntag>() },
  1629. {"parsetype", BuildSimpleBuiltinFactoryCallback<TYqlParseType>() },
  1630. {"ensuretype", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<true>>() },
  1631. {"ensureconvertibleto", BuildSimpleBuiltinFactoryCallback<TYqlTypeAssert<false>>() },
  1632. {"ensure", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Ensure", 2, 3) },
  1633. {"evaluateexpr", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateExpr", 1, 1) },
  1634. {"evaluateatom", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateAtom", 1, 1) },
  1635. {"evaluatetype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateType", 1, 1) },
  1636. {"unwrap", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Unwrap", 1, 2) },
  1637. {"just", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Just", 1, 1) },
  1638. {"nothing", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nothing", 1, 1) },
  1639. {"formattype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatType", 1, 1) },
  1640. {"typeof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeOf", 1, 1) },
  1641. {"instanceof", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("InstanceOf", 1, 1) },
  1642. {"datatype", BuildSimpleBuiltinFactoryCallback<TYqlDataType>() },
  1643. {"optionaltype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalType", 1, 1) },
  1644. {"listtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListType", 1, 1) },
  1645. {"streamtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamType", 1, 1) },
  1646. {"dicttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictType", 2, 2) },
  1647. {"tupletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleType", 0, -1) },
  1648. {"generictype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("GenericType", 0, 0) },
  1649. {"unittype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("UnitType", 0, 0) },
  1650. {"voidtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidType", 0, 0) },
  1651. {"resourcetype", BuildSimpleBuiltinFactoryCallback<TYqlResourceType>() },
  1652. {"taggedtype", BuildSimpleBuiltinFactoryCallback<TYqlTaggedType>() },
  1653. {"varianttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantType", 1, 1) },
  1654. {"callabletype", BuildSimpleBuiltinFactoryCallback<TYqlCallableType>() },
  1655. {"optionalitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalItemType", 1, 1) },
  1656. {"listitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListItemType", 1, 1) },
  1657. {"streamitemtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamItemType", 1, 1) },
  1658. {"dictkeytype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictKeyType", 1, 1) },
  1659. {"dictpayloadtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictPayloadType", 1, 1) },
  1660. {"tupleelementtype", BuildSimpleBuiltinFactoryCallback<TYqlTupleElementType>() },
  1661. {"structmembertype", BuildSimpleBuiltinFactoryCallback<TYqlStructMemberType>() },
  1662. {"callableresulttype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableResultType", 1, 1) },
  1663. {"callableargumenttype", BuildSimpleBuiltinFactoryCallback<TYqlCallableArgumentType>() },
  1664. {"variantunderlyingtype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantUnderlyingType", 1, 1) },
  1665. {"fromysonsimpletype", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FromYsonSimpleType", 2, 2) },
  1666. {"currentutcdate", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDate", 0, -1) },
  1667. {"currentutcdatetime", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcDatetime", 0, -1) },
  1668. {"currentutctimestamp", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "CurrentUtcTimestamp", 0, -1) },
  1669. {"currentoperationid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationId", 0, 0) },
  1670. {"currentoperationsharedid", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentOperationSharedId", 0, 0) },
  1671. {"currentauthenticateduser", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CurrentAuthenticatedUser", 0, 0) },
  1672. {"addtimezone", BuildSimpleBuiltinFactoryCallback<TYqlAddTimezone>() },
  1673. {"removetimezone", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("RemoveTimezone", 1, 1) },
  1674. {"typehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeHandle", 1, 1) },
  1675. {"parsetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ParseTypeHandle", 1, 1) },
  1676. {"typekind", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TypeKind", 1, 1) },
  1677. {"datatypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeComponents", 1, 1) },
  1678. {"datatypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DataTypeHandle", 1, 1) },
  1679. {"optionaltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("OptionalTypeHandle", 1, 1) },
  1680. {"listtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListTypeHandle", 1, 1) },
  1681. {"streamtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StreamTypeHandle", 1, 1) },
  1682. {"tupletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeComponents", 1, 1) },
  1683. {"tupletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TupleTypeHandle", 1, 1) },
  1684. {"structtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeComponents", 1, 1) },
  1685. {"structtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("StructTypeHandle", 1, 1) },
  1686. {"dicttypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeComponents", 1, 1) },
  1687. {"dicttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("DictTypeHandle", 2, 2) },
  1688. {"resourcetypetag", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeTag", 1, 1) },
  1689. {"resourcetypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ResourceTypeHandle", 1, 1) },
  1690. {"taggedtypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeComponents", 1, 1) },
  1691. {"taggedtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("TaggedTypeHandle", 2, 2) },
  1692. {"varianttypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VariantTypeHandle", 1, 1) },
  1693. {"voidtypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("VoidTypeHandle", 0, 0) },
  1694. {"nulltypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("NullTypeHandle", 0, 0) },
  1695. {"callabletypecomponents", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeComponents", 1, 1) },
  1696. {"callableargument", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableArgument", 1, 3) },
  1697. {"callabletypehandle", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("CallableTypeHandle", 2, 4) },
  1698. {"formatcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FormatCode", 1, 1) },
  1699. {"worldcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("WorldCode", 0, 0) },
  1700. {"atomcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("AtomCode", 1, 1) },
  1701. {"listcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ListCode", 0, -1) },
  1702. {"funccode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("FuncCode", 1, -1) },
  1703. {"lambdacode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("LambdaCode", 1, 2) },
  1704. {"evaluatecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("EvaluateCode", 1, 1) },
  1705. {"reprcode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("ReprCode", 1, 1) },
  1706. {"quotecode", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("QuoteCode", 1, 1) },
  1707. // Tuple builtins
  1708. {"astuple", BuildSimpleBuiltinFactoryCallback<TTupleNode>()},
  1709. // Struct builtins
  1710. {"addmember", BuildNamedBuiltinFactoryCallback<TAddMember>("AddMember")},
  1711. {"removemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("RemoveMember")},
  1712. {"forceremovemember", BuildNamedBuiltinFactoryCallback<TRemoveMember>("ForceRemoveMember")},
  1713. {"combinemembers", BuildNamedBuiltinFactoryCallback<TCombineMembers>("FlattenMembers")},
  1714. {"flattenmembers", BuildNamedBuiltinFactoryCallback<TFlattenMembers>("FlattenMembers")},
  1715. // File builtins
  1716. {"filepath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FilePath")},
  1717. {"filecontent", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FileContent")},
  1718. {"folderpath", BuildNamedBuiltinFactoryCallback<TYqlAtom>("FolderPath") },
  1719. {"files", BuildNamedBuiltinFactoryCallback<TYqlAtom>("Files")},
  1720. {"parsefile", BuildSimpleBuiltinFactoryCallback<TYqlParseFileOp>()},
  1721. // Misc builtins
  1722. {"coalesce", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1)},
  1723. {"nvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Coalesce", 1, -1) },
  1724. {"nanvl", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Nanvl", 2, 2) },
  1725. {"likely", BuildNamedArgcBuiltinFactoryCallback<TCallNodeImpl>("Likely", 1, -1)},
  1726. {"random", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "Random", 1, -1)},
  1727. {"randomnumber", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomNumber", 1, -1)},
  1728. {"randomuuid", BuildNamedDepsArgcBuiltinFactoryCallback<TCallNodeDepArgs>(0, "RandomUuid", 1, -1) },
  1729. {"tablepath", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TablePath") },
  1730. {"tablerecord", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
  1731. {"tablerecordindex", BuildNamedBuiltinFactoryCallback<TCallDirectRow>("TableRecord") },
  1732. {"weakfield", BuildSimpleBuiltinFactoryCallback<TWeakFieldOp>()},
  1733. {"tablerow", BuildSimpleBuiltinFactoryCallback<TTableRow>() },
  1734. // Hint builtins
  1735. {"grouping", BuildSimpleBuiltinFactoryCallback<TGroupingNode>()},
  1736. // Window funcitons
  1737. {"rownumber", BuildNamedArgcBuiltinFactoryCallback<TWinRowNumber>("RowNumber", 0, 0)},
  1738. /// by SQL2011 should be with sort
  1739. {"lead", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lead", 1, 2)},
  1740. {"lag", BuildNamedArgcBuiltinFactoryCallback<TWinLeadLag>("Lag", 1, 2)},
  1741. // Hopping intervals time functions
  1742. {"hopstart", BuildSimpleBuiltinFactoryCallback<THoppingTime<true>>()},
  1743. {"hopend", BuildSimpleBuiltinFactoryCallback<THoppingTime<false>>()},
  1744. };
  1745. return builtinFuncs;
  1746. }
  1747. TAggrFuncFactoryCallbackMap MakeAggrFuncs() {
  1748. constexpr auto OverWindow = EAggregateMode::OverWindow;
  1749. TAggrFuncFactoryCallbackMap aggrFuncs = {
  1750. {"min", BuildAggrFuncFactoryCallback("Min", "min_traits_factory")},
  1751. {"max", BuildAggrFuncFactoryCallback("Max", "max_traits_factory")},
  1752. {"minby", BuildAggrFuncFactoryCallback("MinBy", "min_by_traits_factory", KEY_PAYLOAD)},
  1753. {"maxby", BuildAggrFuncFactoryCallback("MaxBy", "max_by_traits_factory", KEY_PAYLOAD)},
  1754. {"sum", BuildAggrFuncFactoryCallback("Sum", "sum_traits_factory")},
  1755. {"sumif", BuildAggrFuncFactoryCallback("SumIf", "sum_if_traits_factory", PAYLOAD_PREDICATE) },
  1756. {"some", BuildAggrFuncFactoryCallback("Some", "some_traits_factory")},
  1757. {"somevalue", BuildAggrFuncFactoryCallback("SomeValue", "some_traits_factory")},
  1758. {"count", BuildAggrFuncFactoryCallback("Count", "count_traits_factory", COUNT)},
  1759. {"countif", BuildAggrFuncFactoryCallback("CountIf", "count_if_traits_factory")},
  1760. {"every", BuildAggrFuncFactoryCallback("Every", "and_traits_factory")},
  1761. {"booland", BuildAggrFuncFactoryCallback("BoolAnd", "and_traits_factory")},
  1762. {"boolor", BuildAggrFuncFactoryCallback("BoolOr", "or_traits_factory")},
  1763. {"bitand", BuildAggrFuncFactoryCallback("BitAnd", "bit_and_traits_factory")},
  1764. {"bitor", BuildAggrFuncFactoryCallback("BitOr", "bit_or_traits_factory")},
  1765. {"bitxor", BuildAggrFuncFactoryCallback("BitXor", "bit_xor_traits_factory")},
  1766. {"avg", BuildAggrFuncFactoryCallback("Avg", "avg_traits_factory")},
  1767. {"avgif", BuildAggrFuncFactoryCallback("AvgIf", "avg_if_traits_factory", PAYLOAD_PREDICATE) },
  1768. {"list", BuildAggrFuncFactoryCallback("List", "list_traits_factory", LIST)},
  1769. {"agglist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  1770. {"aggrlist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  1771. {"aggregatelist", BuildAggrFuncFactoryCallback("AggregateList", "list2_traits_factory", LIST)},
  1772. {"listdistinct", BuildAggrFuncFactoryCallback("ListDistinct", "set_traits_factory", LIST)},
  1773. {"agglistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  1774. {"aggrlistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  1775. {"aggregatelistdistinct", BuildAggrFuncFactoryCallback("AggregateListDistinct", "set_traits_factory", LIST)},
  1776. {"median", BuildAggrFuncFactoryCallback("Median", "percentile_traits_factory", PERCENTILE)},
  1777. {"percentile", BuildAggrFuncFactoryCallback("Percentile", "percentile_traits_factory", PERCENTILE)},
  1778. {"mode", BuildAggrFuncFactoryCallback("Mode", "topfreq_traits_factory", TOPFREQ) },
  1779. {"topfreq", BuildAggrFuncFactoryCallback("TopFreq", "topfreq_traits_factory", TOPFREQ) },
  1780. {"top", BuildAggrFuncFactoryCallback("Top", "top_traits_factory", TOP)},
  1781. {"bottom", BuildAggrFuncFactoryCallback("Bottom", "bottom_traits_factory", TOP)},
  1782. {"topby", BuildAggrFuncFactoryCallback("TopBy", "top_by_traits_factory", TOP_BY)},
  1783. {"bottomby", BuildAggrFuncFactoryCallback("BottomBy", "bottom_by_traits_factory", TOP_BY)},
  1784. {"histogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM, "Histogram")},
  1785. {"adaptivewardhistogram", BuildAggrFuncFactoryCallback("AdaptiveWardHistogram", "histogram_adaptive_ward_traits_factory", HISTOGRAM)},
  1786. {"adaptiveweighthistogram", BuildAggrFuncFactoryCallback("AdaptiveWeightHistogram", "histogram_adaptive_weight_traits_factory", HISTOGRAM)},
  1787. {"adaptivedistancehistogram", BuildAggrFuncFactoryCallback("AdaptiveDistanceHistogram", "histogram_adaptive_distance_traits_factory", HISTOGRAM)},
  1788. {"blockwardhistogram", BuildAggrFuncFactoryCallback("BlockWardHistogram", "histogram_block_ward_traits_factory", HISTOGRAM)},
  1789. {"blockweighthistogram", BuildAggrFuncFactoryCallback("BlockWeightHistogram", "histogram_block_weight_traits_factory", HISTOGRAM)},
  1790. {"linearhistogram", BuildAggrFuncFactoryCallback("LinearHistogram", "histogram_linear_traits_factory", LINEAR_HISTOGRAM)},
  1791. {"logarithmichistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM)},
  1792. {"loghistogram", BuildAggrFuncFactoryCallback("LogarithmicHistogram", "histogram_logarithmic_traits_factory", LINEAR_HISTOGRAM, "LogHistogram")},
  1793. {"hyperloglog", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE)},
  1794. {"hll", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "HLL")},
  1795. {"countdistinctestimate", BuildAggrFuncFactoryCallback("HyperLogLog", "hyperloglog_traits_factory", COUNT_DISTINCT_ESTIMATE, "CountDistinctEstimate")},
  1796. {"variance", BuildAggrFuncFactoryCallback("Variance", "variance_0_1_traits_factory")},
  1797. {"stddev", BuildAggrFuncFactoryCallback("StdDev", "variance_1_1_traits_factory")},
  1798. {"populationvariance", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  1799. {"variancepopulation", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  1800. {"populationstddev", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  1801. {"stddevpopulation", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  1802. {"varpop", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  1803. {"stddevpop", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  1804. {"varp", BuildAggrFuncFactoryCallback("VariancePopulation", "variance_0_0_traits_factory")},
  1805. {"stddevp", BuildAggrFuncFactoryCallback("StdDevPopulation", "variance_1_0_traits_factory")},
  1806. {"variancesample", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  1807. {"stddevsample", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  1808. {"varsamp", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  1809. {"stddevsamp", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  1810. {"vars", BuildAggrFuncFactoryCallback("VarianceSample", "variance_0_1_traits_factory")},
  1811. {"stddevs", BuildAggrFuncFactoryCallback("StdDevSample", "variance_1_1_traits_factory")},
  1812. {"correlation", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS)},
  1813. {"corr", BuildAggrFuncFactoryCallback("Correlation", "correlation_traits_factory", TWO_ARGS, "Corr")},
  1814. {"covariance", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covariance")},
  1815. {"covariancesample", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS)},
  1816. {"covarsamp", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarSamp")},
  1817. {"covar", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "Covar")},
  1818. {"covars", BuildAggrFuncFactoryCallback("CovarianceSample", "covariance_sample_traits_factory", TWO_ARGS, "CovarS")},
  1819. {"covariancepopulation", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS)},
  1820. {"covarpop", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarPop")},
  1821. {"covarp", BuildAggrFuncFactoryCallback("CovariancePopulation", "covariance_population_traits_factory", TWO_ARGS, "CovarP")},
  1822. {"udaf", BuildAggrFuncFactoryCallback("UDAF", "udaf_traits_factory", UDAF)},
  1823. // Window functions
  1824. /// by SQL2011 should be with sort
  1825. {"rank", BuildAggrFuncFactoryCallback("Rank", "rank_traits_factory", WINDOW_AUTOARGS)},
  1826. {"denserank", BuildAggrFuncFactoryCallback("DenseRank", "dense_rank_traits_factory", WINDOW_AUTOARGS)},
  1827. // \todo unsupported now, required count element in window
  1828. //{"ntile", BuildAggrFuncFactoryCallback("Ntile", "ntile_traits_factory")},
  1829. //{"percentrank", BuildAggrFuncFactoryCallback("PercentRank", "percent_rank_traits_factory")},
  1830. //{"cumedist", BuildAggrFuncFactoryCallback("CumeDist", "cume_dist_traits_factory")},
  1831. {"firstvalue", BuildAggrFuncFactoryCallback("FirstValue", "first_value_traits_factory", {OverWindow})},
  1832. {"lastvalue", BuildAggrFuncFactoryCallback("LastValue", "last_value_traits_factory", {OverWindow})},
  1833. {"firstvalueignorenulls", BuildAggrFuncFactoryCallback("FirstValueIgnoreNulls", "first_value_ignore_nulls_traits_factory", {OverWindow})},
  1834. {"lastvalueignorenulls", BuildAggrFuncFactoryCallback("LastValueIgnoreNulls", "last_value_ignore_nulls_traits_factory", {OverWindow})},
  1835. };
  1836. return aggrFuncs;
  1837. }
  1838. TCoreFuncMap MakeCoreFuncs() {
  1839. TCoreFuncMap coreFuncs = {
  1840. {"listindexof", { "IndexOf", 2, 2}},
  1841. {"testbit", { "TestBit", 2, 2}},
  1842. {"setbit", { "SetBit", 2, 2}},
  1843. {"clearbit", { "ClearBit", 2, 2}},
  1844. {"flipbit", { "FlipBit", 2, 2 }},
  1845. {"toset", { "ToSet", 1, 1 }},
  1846. {"setisdisjoint", { "SetIsDisjoint", 2, 2}},
  1847. {"setintersection", { "SetIntersection", 2, 3}},
  1848. {"setincludes", { "SetIncludes", 2, 2}},
  1849. {"setunion", { "SetUnion", 2, 3}},
  1850. {"setdifference", { "SetDifference", 2, 2}},
  1851. {"setsymmetricdifference", { "SetSymmetricDifference", 2, 3}},
  1852. {"listaggregate", { "ListAggregate", 2, 2}},
  1853. {"dictaggregate", { "DictAggregate", 2, 2}},
  1854. };
  1855. return coreFuncs;
  1856. }
  1857. };
  1858. TNodePtr BuildBuiltinFunc(TContext& ctx, TPosition pos, TString name, const TVector<TNodePtr>& args,
  1859. const TString& nameSpace, EAggregateMode aggMode, bool* mustUseNamed, TFuncPrepareNameNode funcPrepareNameNode) {
  1860. const TBuiltinFuncData* funcData = Singleton<TBuiltinFuncData>();
  1861. const TBuiltinFactoryCallbackMap& builtinFuncs = funcData->BuiltinFuncs;
  1862. const TAggrFuncFactoryCallbackMap& aggrFuncs = funcData->AggrFuncs;
  1863. const TCoreFuncMap& coreFuncs = funcData->CoreFuncs;
  1864. for (auto& arg: args) {
  1865. if (!arg) {
  1866. return nullptr;
  1867. }
  1868. }
  1869. TString normalizedName(name);
  1870. TString ns = to_lower(nameSpace);
  1871. if (ns.empty()) {
  1872. TMaybe<TIssue> error = NormalizeName(pos, normalizedName);
  1873. if (!error.Empty()) {
  1874. return new TInvalidBuiltin(pos, error->GetMessage());
  1875. }
  1876. auto coreFunc = coreFuncs.find(normalizedName);
  1877. if (coreFunc != coreFuncs.end()) {
  1878. ns = "core";
  1879. name = coreFunc->second.Name;
  1880. if (args.size() < coreFunc->second.MinArgs || args.size() > coreFunc->second.MaxArgs) {
  1881. return new TInvalidBuiltin(pos, TStringBuilder() << name << " expected from "
  1882. << coreFunc->second.MinArgs << " to " << coreFunc->second.MaxArgs << " arguments, but got: " << args.size());
  1883. }
  1884. if (coreFunc->second.MinArgs != coreFunc->second.MaxArgs) {
  1885. name += ToString(args.size());
  1886. }
  1887. }
  1888. }
  1889. TString moduleResource;
  1890. if (ctx.Settings.ModuleMapping.contains(ns)) {
  1891. moduleResource = ctx.Settings.ModuleMapping.at(ns);
  1892. }
  1893. if (ns == "js") {
  1894. ns = "javascript";
  1895. }
  1896. auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(ns);
  1897. if (scriptType == NKikimr::NMiniKQL::EScriptType::SystemPython2) {
  1898. scriptType = NKikimr::NMiniKQL::EScriptType::Python2;
  1899. }
  1900. if (ns == "yql") {
  1901. return new TCallNodeImpl(pos, name, -1, -1, args);
  1902. } else if (ns == "string" && name == "SplitToList") {
  1903. TNodePtr positionalArgs;
  1904. TNodePtr namedArgs;
  1905. if (mustUseNamed && *mustUseNamed) {
  1906. YQL_ENSURE(args.size() == 2);
  1907. positionalArgs = args[0];
  1908. namedArgs = args[1];
  1909. *mustUseNamed = false;
  1910. }
  1911. TVector<TNodePtr> reuseArgs;
  1912. if (!namedArgs && args && funcPrepareNameNode) {
  1913. TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name);
  1914. reuseArgs.reserve(args.size());
  1915. for (const auto& arg: args) {
  1916. reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg));
  1917. }
  1918. }
  1919. auto usedArgs = reuseArgs ? reuseArgs : args;
  1920. TVector<TNodePtr> positionalArgsElements;
  1921. if (namedArgs) {
  1922. auto positionalArgsTuple = dynamic_cast<TTupleNode*>(positionalArgs.Get());
  1923. Y_DEBUG_ABORT_UNLESS(positionalArgsTuple, "unexpected value at String::SplitToList positional args");
  1924. positionalArgsElements = positionalArgsTuple->Elements();
  1925. } else {
  1926. positionalArgsElements = usedArgs;
  1927. }
  1928. auto positionalArgsTupleSize = positionalArgsElements.size();
  1929. auto argsSize = positionalArgsTupleSize;
  1930. TNodePtr trueLiteral = BuildLiteralBool(pos, "true");
  1931. TNodePtr falseLiteral = BuildLiteralBool(pos, "false");
  1932. TNodePtr namedDelimeterStringArg;
  1933. TNodePtr namedSkipEmptyArg;
  1934. bool hasDelimeterString = false;
  1935. if (auto namedArgsStruct = dynamic_cast<TStructNode*>(namedArgs.Get())) {
  1936. auto exprs = namedArgsStruct->GetExprs();
  1937. for (auto& expr : exprs) {
  1938. if (expr->GetLabel() == "DelimeterString") {
  1939. hasDelimeterString = true;
  1940. break;
  1941. }
  1942. }
  1943. argsSize += namedArgsStruct->GetExprs().size();
  1944. }
  1945. if (argsSize < 3) {
  1946. positionalArgsElements.push_back(falseLiteral);
  1947. }
  1948. if (argsSize < 4 && !hasDelimeterString) {
  1949. positionalArgsElements.push_back(trueLiteral);
  1950. }
  1951. if (namedArgs) {
  1952. positionalArgs = BuildTuple(pos, positionalArgsElements);
  1953. } else {
  1954. usedArgs = positionalArgsElements;
  1955. }
  1956. TNodePtr customUserType = nullptr;
  1957. const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType);
  1958. TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs);
  1959. TVector<TNodePtr> applyArgs = { udfNode };
  1960. applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end());
  1961. return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
  1962. } else if (moduleResource) {
  1963. auto exportName = ns == "core" ? name : "$" + name;
  1964. TVector<TNodePtr> applyArgs = {
  1965. new TCallNodeImpl(pos, "bind", {
  1966. BuildAtom(pos, ns + "_module", 0), BuildQuotedAtom(pos, exportName)
  1967. })
  1968. };
  1969. applyArgs.insert(applyArgs.end(), args.begin(), args.end());
  1970. return new TCallNodeImpl(pos, "Apply", applyArgs);
  1971. } else if (ns == "hyperscan" || ns == "pcre" || ns == "pire" || ns.StartsWith("re2")) {
  1972. TString moduleName(nameSpace);
  1973. moduleName.to_title();
  1974. if ((args.size() == 1 || args.size() == 2) && (name.StartsWith("Multi") || (ns.StartsWith("re2") && name == "Capture"))) {
  1975. TVector<TNodePtr> multiArgs{
  1976. ns.StartsWith("re2") && name == "Capture" ? MakePair(pos, args) : args[0],
  1977. new TCallNodeImpl(pos, "Void", 0, 0, {}),
  1978. args[0]
  1979. };
  1980. auto fullName = moduleName + "." + name;
  1981. return new TYqlTypeConfigUdf(pos, fullName, multiArgs, multiArgs.size() + 1);
  1982. } else if (!(ns.StartsWith("re2") && name == "Options")) {
  1983. auto newArgs = args;
  1984. if (ns.StartsWith("re2")) {
  1985. // convert run config is tuple of string and optional options
  1986. if (args.size() == 1 || args.size() == 2) {
  1987. newArgs[0] = MakePair(pos, args);
  1988. if (args.size() == 2) {
  1989. newArgs.pop_back();
  1990. }
  1991. } else {
  1992. return new TInvalidBuiltin(pos, TStringBuilder() << ns << "." << name << " expected one or two arguments.");
  1993. }
  1994. }
  1995. return BuildUdf(ctx, pos, moduleName, name, newArgs);
  1996. }
  1997. } else if (ns == "datetime2" && (name == "Format" || name == "Parse")) {
  1998. return BuildUdf(ctx, pos, nameSpace, name, args);
  1999. } else if (scriptType != NKikimr::NMiniKQL::EScriptType::Unknown) {
  2000. auto scriptName = NKikimr::NMiniKQL::ScriptTypeAsStr(scriptType);
  2001. return new TScriptUdf(pos, TString(scriptName), name, args);
  2002. } else if (ns.empty()) {
  2003. auto type = NormalizeTypeString(normalizedName);
  2004. if (AvailableDataTypes.contains(type)) {
  2005. return new TYqlData(pos, type, args);
  2006. }
  2007. if (normalizedName == "tablename") {
  2008. return new TTableName(pos, args, ctx.CurrCluster);
  2009. }
  2010. if (normalizedName == "aggregationfactory") {
  2011. if (args.size() < 1 || !args[0]->GetLiteral("String")) {
  2012. return new TInvalidBuiltin(pos, "AGGREGATION_FACTORY requries a function name");
  2013. }
  2014. auto aggNormalizedName = *args[0]->GetLiteral("String");
  2015. auto error = NormalizeName(pos, aggNormalizedName);
  2016. if (!error.Empty()) {
  2017. return new TInvalidBuiltin(pos, error->GetMessage());
  2018. }
  2019. if (aggNormalizedName == "aggregateby") {
  2020. return new TInvalidBuiltin(pos, "AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
  2021. }
  2022. if (aggNormalizedName == "multiaggregateby") {
  2023. return new TInvalidBuiltin(pos, "MULTI_AGGREGATE_BY is not allowed to use with AGGREGATION_FACTORY");
  2024. }
  2025. auto aggrCallback = aggrFuncs.find(aggNormalizedName);
  2026. if (aggrCallback == aggrFuncs.end()) {
  2027. return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown aggregation function: " << *args[0]->GetLiteral("String"));
  2028. }
  2029. if (aggMode == EAggregateMode::Distinct) {
  2030. return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification");
  2031. }
  2032. return (*aggrCallback).second(pos, args, aggMode, true).Release();
  2033. }
  2034. if (normalizedName == "aggregateby" || normalizedName == "multiaggregateby") {
  2035. const bool multi = (normalizedName == "multiaggregateby");
  2036. if (args.size() != 2) {
  2037. return new TInvalidBuiltin(pos, TStringBuilder() << (multi ? "MULTI_AGGREGATE_BY" : "AGGREGATE_BY") << " requries two arguments");
  2038. }
  2039. auto name = multi ? "MultiAggregateBy" : "AggregateBy";
  2040. auto aggr = BuildFactoryAggregation(pos, name, "", aggMode, multi);
  2041. return new TBasicAggrFunc(pos, name, aggr, args);
  2042. }
  2043. auto aggrCallback = aggrFuncs.find(normalizedName);
  2044. if (aggrCallback != aggrFuncs.end()) {
  2045. return (*aggrCallback).second(pos, args, aggMode, false).Release();
  2046. }
  2047. if (aggMode == EAggregateMode::Distinct) {
  2048. return new TInvalidBuiltin(pos, "Only aggregation functions allow DISTINCT set specification");
  2049. }
  2050. auto builtinCallback = builtinFuncs.find(normalizedName);
  2051. if (builtinCallback != builtinFuncs.end()) {
  2052. return (*builtinCallback).second(pos, args);
  2053. } else if (normalizedName == "asstruct" || normalizedName == "structtype") {
  2054. if (args.empty()) {
  2055. return new TCallNodeImpl(pos, normalizedName == "asstruct" ? "AsStruct" : "StructType", 0, 0, args);
  2056. }
  2057. if (mustUseNamed && *mustUseNamed) {
  2058. *mustUseNamed = false;
  2059. YQL_ENSURE(args.size() == 2);
  2060. Y_DEBUG_ABORT_UNLESS(dynamic_cast<TTupleNode*>(args[0].Get()));
  2061. auto posArgs = static_cast<TTupleNode*>(args[0].Get());
  2062. if (posArgs->IsEmpty()) {
  2063. if (normalizedName == "asstruct") {
  2064. return args[1];
  2065. } else {
  2066. Y_DEBUG_ABORT_UNLESS(dynamic_cast<TStructNode*>(args[1].Get()));
  2067. auto namedArgs = static_cast<TStructNode*>(args[1].Get());
  2068. return new TStructTypeNode(pos, namedArgs->GetExprs());
  2069. }
  2070. }
  2071. }
  2072. return new TInvalidBuiltin(pos, TStringBuilder() <<
  2073. (normalizedName == "asstruct" ? "AsStruct" : "StructType") <<
  2074. " requires all argument to be named");
  2075. } else if (normalizedName == "expandstruct") {
  2076. if (mustUseNamed) {
  2077. if (!*mustUseNamed) {
  2078. return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires at least one named argument");
  2079. }
  2080. *mustUseNamed = false;
  2081. }
  2082. YQL_ENSURE(args.size() == 2);
  2083. auto posArgs = static_cast<TTupleNode*>(args[0].Get());
  2084. Y_DEBUG_ABORT_UNLESS(dynamic_cast<TTupleNode*>(args[0].Get()));
  2085. Y_DEBUG_ABORT_UNLESS(dynamic_cast<TStructNode*>(args[1].Get()));
  2086. if (posArgs->GetTupleSize() != 1) {
  2087. return new TInvalidBuiltin(pos, TStringBuilder() << "ExpandStruct requires all arguments except first to be named");
  2088. }
  2089. TVector<TNodePtr> flattenMembersArgs = {
  2090. BuildTuple(pos, {BuildQuotedAtom(pos, ""), posArgs->GetTupleElement(0)}),
  2091. BuildTuple(pos, {BuildQuotedAtom(pos, ""), args[1]}),
  2092. };
  2093. return new TCallNodeImpl(pos, "FlattenMembers", 2, 2, flattenMembersArgs);
  2094. } else {
  2095. return new TInvalidBuiltin(pos, TStringBuilder() << "Unknown builtin: " << name << ", to use YQL functions, try YQL::" << name);
  2096. }
  2097. }
  2098. TNodePtr positionalArgs;
  2099. TNodePtr namedArgs;
  2100. if (mustUseNamed && *mustUseNamed) {
  2101. YQL_ENSURE(args.size() == 2);
  2102. positionalArgs = args[0];
  2103. namedArgs = args[1];
  2104. *mustUseNamed = false;
  2105. }
  2106. TVector<TNodePtr> reuseArgs;
  2107. if (!namedArgs && args && funcPrepareNameNode) {
  2108. TString reusedBaseName = TStringBuilder() << "Arg" << to_title(nameSpace) << to_title(name);
  2109. reuseArgs.reserve(args.size());
  2110. for (const auto& arg: args) {
  2111. reuseArgs.push_back(funcPrepareNameNode(reusedBaseName, arg));
  2112. }
  2113. }
  2114. auto usedArgs = reuseArgs ? reuseArgs : args;
  2115. TNodePtr customUserType = nullptr;
  2116. if (ns == "yson") {
  2117. if (name == "ConvertTo" && usedArgs.size() > 1) {
  2118. customUserType = usedArgs[1];
  2119. usedArgs.erase(usedArgs.begin() + 1);
  2120. }
  2121. ui32 optionsIndex = name.Contains("Lookup") ? 2 : 1;
  2122. if (usedArgs.size() <= optionsIndex && (ctx.PragmaYsonAutoConvert || ctx.PragmaYsonStrict)) {
  2123. usedArgs.push_back(BuildYsonOptionsNode(pos, ctx.PragmaYsonAutoConvert, ctx.PragmaYsonStrict));
  2124. }
  2125. } else if (ns == "json") {
  2126. ctx.Warning(pos, TIssuesIds::YQL_DEPRECATED_JSON_UDF)
  2127. << "Json UDF is deprecated and is going to be removed, please switch to Yson UDF that also supports Json input: https://yql.yandex-team.ru/docs/yt/udf/list/yson/";
  2128. }
  2129. const auto& udfArgs = BuildUdfArgs(ctx, pos, usedArgs, positionalArgs, namedArgs, customUserType);
  2130. TNodePtr udfNode = BuildUdf(ctx, pos, nameSpace, name, udfArgs);
  2131. TVector<TNodePtr> applyArgs = { udfNode };
  2132. applyArgs.insert(applyArgs.end(), usedArgs.begin(), usedArgs.end());
  2133. return new TCallNodeImpl(pos, namedArgs ? "NamedApply" : "Apply", applyArgs);
  2134. }
  2135. } // namespace NSQLTranslationV0