sql_call_expr.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. #include "sql_call_expr.h"
  2. #include "sql_expression.h"
  3. #include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
  4. #include <yql/essentials/minikql/mkql_program_builder.h>
  5. namespace NSQLTranslationV1 {
  6. using namespace NSQLv1Generated;
  7. static bool ValidateForCounters(const TString& input) {
  8. for (auto c : input) {
  9. if (!(IsAlnum(c) || c == '_')) {
  10. return false;
  11. }
  12. }
  13. return true;
  14. }
  15. TNodePtr TSqlCallExpr::BuildUdf(bool forReduce) {
  16. auto result = Node ? Node : BuildCallable(Pos, Module, Func, Args, forReduce);
  17. if (to_lower(Module) == "tensorflow" && Func == "RunBatch") {
  18. if (Args.size() > 2) {
  19. Args.erase(Args.begin() + 2);
  20. } else {
  21. Ctx.Error(Pos) << "Excepted >= 3 arguments, but got: " << Args.size();
  22. return nullptr;
  23. }
  24. }
  25. return result;
  26. }
  27. TNodePtr TSqlCallExpr::BuildCall() {
  28. TVector<TNodePtr> args;
  29. bool warnOnYqlNameSpace = true;
  30. TUdfNode* udf_node = Node ? Node->GetUdfNode() : nullptr;
  31. if (udf_node) {
  32. if (!udf_node->DoInit(Ctx, nullptr)) {
  33. return nullptr;
  34. }
  35. TNodePtr positional_args = BuildTuple(Pos, PositionalArgs);
  36. TNodePtr positional = positional_args->Y("TypeOf", positional_args);
  37. TNodePtr named_args = BuildStructure(Pos, NamedArgs);
  38. TNodePtr named = named_args->Y("TypeOf", named_args);
  39. TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()});
  40. TNodePtr options = udf_node->BuildOptions();
  41. if (udf_node->IsScript()) {
  42. auto udf = BuildScriptUdf(Pos, udf_node->GetModule(), udf_node->GetFunction(), udf_node->GetScriptArgs(), options);
  43. TVector<TNodePtr> applyArgs;
  44. applyArgs.push_back(new TAstAtomNodeImpl(Pos, !NamedArgs.empty() ? "NamedApply" : "Apply", TNodeFlags::Default));
  45. applyArgs.push_back(udf);
  46. if (!NamedArgs.empty()) {
  47. applyArgs.push_back(BuildTuple(Pos, PositionalArgs));
  48. applyArgs.push_back(BuildStructure(Pos, NamedArgs));
  49. } else {
  50. applyArgs.insert(applyArgs.end(), PositionalArgs.begin(), PositionalArgs.end());
  51. }
  52. return new TAstListNodeImpl(Pos, applyArgs);
  53. }
  54. return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(),
  55. args, positional_args, named_args, custom_user_type,
  56. udf_node->GetTypeConfig(), udf_node->GetRunConfig(), options);
  57. }
  58. if (Node && (!Node->FuncName() || Node->IsScript())) {
  59. Module = "YQL";
  60. Func = NamedArgs.empty() ? "Apply" : "NamedApply";
  61. warnOnYqlNameSpace = false;
  62. args.push_back(Node);
  63. }
  64. if (Node && Node->FuncName() && !Node->IsScript()) {
  65. Module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
  66. Func = *Node->FuncName();
  67. }
  68. bool mustUseNamed = !NamedArgs.empty();
  69. if (mustUseNamed) {
  70. if (Node && (!Node->FuncName() || Node->IsScript())) {
  71. mustUseNamed = false;
  72. }
  73. args.emplace_back(BuildTuple(Pos, PositionalArgs));
  74. args.emplace_back(BuildStructure(Pos, NamedArgs));
  75. } else if (IsExternalCall) {
  76. Func = "SqlExternalFunction";
  77. if (Args.size() < 2 || Args.size() > 3) {
  78. Ctx.Error(Pos) << "EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: " << Args.size();
  79. return nullptr;
  80. }
  81. if (Args.size() == 3) {
  82. args.insert(args.end(), Args.begin(), Args.end() - 1);
  83. Args.erase(Args.begin(), Args.end() - 1);
  84. } else {
  85. args.insert(args.end(), Args.begin(), Args.end());
  86. Args.erase(Args.begin(), Args.end());
  87. }
  88. auto configNode = new TExternalFunctionConfig(Pos, CallConfig);
  89. auto configList = new TAstListNodeImpl(Pos, { new TAstAtomNodeImpl(Pos, "quote", 0), configNode });
  90. args.push_back(configList);
  91. } else {
  92. args.insert(args.end(), Args.begin(), Args.end());
  93. }
  94. auto result = BuildBuiltinFunc(Ctx, Pos, Func, args, Module, AggMode, &mustUseNamed, warnOnYqlNameSpace);
  95. if (mustUseNamed) {
  96. Error() << "Named args are used for call, but unsupported by function: " << Func;
  97. return nullptr;
  98. }
  99. if (WindowName) {
  100. result = BuildCalcOverWindow(Pos, WindowName, result);
  101. }
  102. return result;
  103. }
  104. bool TSqlCallExpr::Init(const TRule_value_constructor& node) {
  105. switch (node.Alt_case()) {
  106. case TRule_value_constructor::kAltValueConstructor1: {
  107. auto& ctor = node.GetAlt_value_constructor1();
  108. Func = "Variant";
  109. TSqlExpression expr(Ctx, Mode);
  110. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  111. return false;
  112. }
  113. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  114. return false;
  115. }
  116. if (!Expr(expr, Args, ctor.GetRule_expr7())) {
  117. return false;
  118. }
  119. break;
  120. }
  121. case TRule_value_constructor::kAltValueConstructor2: {
  122. auto& ctor = node.GetAlt_value_constructor2();
  123. Func = "Enum";
  124. TSqlExpression expr(Ctx, Mode);
  125. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  126. return false;
  127. }
  128. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  129. return false;
  130. }
  131. break;
  132. }
  133. case TRule_value_constructor::kAltValueConstructor3: {
  134. auto& ctor = node.GetAlt_value_constructor3();
  135. Func = "Callable";
  136. TSqlExpression expr(Ctx, Mode);
  137. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  138. return false;
  139. }
  140. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  141. return false;
  142. }
  143. break;
  144. }
  145. case TRule_value_constructor::ALT_NOT_SET:
  146. Y_ABORT("You should change implementation according to grammar changes");
  147. }
  148. PositionalArgs = Args;
  149. return true;
  150. }
  151. bool TSqlCallExpr::ExtractCallParam(const TRule_external_call_param& node) {
  152. TString paramName = Id(node.GetRule_an_id1(), *this);
  153. paramName = to_lower(paramName);
  154. if (CallConfig.contains(paramName)) {
  155. Ctx.Error() << "WITH " << to_upper(paramName).Quote()
  156. << " clause should be specified only once";
  157. return false;
  158. }
  159. const bool optimizeForParam = paramName == "optimize_for";
  160. const auto columnRefState = optimizeForParam ? EColumnRefState::AsStringLiteral : EColumnRefState::Deny;
  161. TColumnRefScope scope(Ctx, columnRefState);
  162. if (optimizeForParam) {
  163. scope.SetNoColumnErrContext("in external call params");
  164. }
  165. TSqlExpression expression(Ctx, Mode);
  166. auto value = expression.Build(node.GetRule_expr3());
  167. if (value && optimizeForParam) {
  168. TDeferredAtom atom;
  169. MakeTableFromExpression(Ctx.Pos(), Ctx, value, atom);
  170. value = new TCallNodeImpl(Ctx.Pos(), "String", { atom.Build() });
  171. }
  172. if (!value) {
  173. return false;
  174. }
  175. CallConfig[paramName] = value;
  176. return true;
  177. }
  178. bool TSqlCallExpr::ConfigureExternalCall(const TRule_external_call_settings& node) {
  179. bool success = ExtractCallParam(node.GetRule_external_call_param1());
  180. for (auto& block: node.GetBlock2()) {
  181. success = ExtractCallParam(block.GetRule_external_call_param2()) && success;
  182. }
  183. return success;
  184. }
  185. bool TSqlCallExpr::Init(const TRule_using_call_expr& node) {
  186. // using_call_expr: ((an_id_or_type NAMESPACE an_id_or_type) | an_id_expr | bind_parameter | (EXTERNAL FUNCTION)) invoke_expr;
  187. const auto& block = node.GetBlock1();
  188. switch (block.Alt_case()) {
  189. case TRule_using_call_expr::TBlock1::kAlt1: {
  190. auto& subblock = block.GetAlt1();
  191. Module = Id(subblock.GetRule_an_id_or_type1(), *this);
  192. Func = Id(subblock.GetRule_an_id_or_type3(), *this);
  193. break;
  194. }
  195. case TRule_using_call_expr::TBlock1::kAlt2: {
  196. Func = Id(block.GetAlt2().GetRule_an_id_expr1(), *this);
  197. break;
  198. }
  199. case TRule_using_call_expr::TBlock1::kAlt3: {
  200. TString bindName;
  201. if (!NamedNodeImpl(block.GetAlt3().GetRule_bind_parameter1(), bindName, *this)) {
  202. return false;
  203. }
  204. Node = GetNamedNode(bindName);
  205. if (!Node) {
  206. return false;
  207. }
  208. break;
  209. }
  210. case TRule_using_call_expr::TBlock1::kAlt4: {
  211. IsExternalCall = true;
  212. break;
  213. }
  214. case TRule_using_call_expr::TBlock1::ALT_NOT_SET:
  215. Y_ABORT("You should change implementation according to grammar changes");
  216. }
  217. YQL_ENSURE(!DistinctAllowed);
  218. UsingCallExpr = true;
  219. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  220. return Init(node.GetRule_invoke_expr2());
  221. }
  222. void TSqlCallExpr::InitName(const TString& name) {
  223. Module = "";
  224. Func = name;
  225. }
  226. void TSqlCallExpr::InitExpr(const TNodePtr& expr) {
  227. Node = expr;
  228. }
  229. bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node) {
  230. const bool isNamed = node.HasBlock2();
  231. TMaybe<EColumnRefState> status;
  232. // TODO: support named args
  233. if (!isNamed) {
  234. status = GetFunctionArgColumnStatus(Ctx, module, func, idx);
  235. }
  236. TNodePtr expr;
  237. if (status) {
  238. TColumnRefScope scope(Ctx, *status, /* isTopLevel = */ false);
  239. expr = NamedExpr(node);
  240. } else {
  241. expr = NamedExpr(node);
  242. }
  243. if (!expr) {
  244. return false;
  245. }
  246. Args.emplace_back(std::move(expr));
  247. if (!isNamed) {
  248. ++idx;
  249. }
  250. return true;
  251. }
  252. bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) {
  253. TString module = Module;
  254. TString func = Func;
  255. if (Node && Node->FuncName() && !Node->IsScript()) {
  256. module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
  257. func = *Node->FuncName();
  258. }
  259. size_t idx = 0;
  260. if (!FillArg(module, func, idx, node.GetRule_named_expr1())) {
  261. return false;
  262. }
  263. for (auto& b: node.GetBlock2()) {
  264. if (!FillArg(module, func, idx, b.GetRule_named_expr2())) {
  265. return false;
  266. }
  267. }
  268. return true;
  269. }
  270. bool TSqlCallExpr::Init(const TRule_invoke_expr& node) {
  271. // invoke_expr: LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN invoke_expr_tail;
  272. // invoke_expr_tail:
  273. // (null_treatment | filter_clause)? (OVER window_name_or_specification)?
  274. // ;
  275. Pos = Ctx.Pos();
  276. if (node.HasBlock2()) {
  277. switch (node.GetBlock2().Alt_case()) {
  278. case TRule_invoke_expr::TBlock2::kAlt1: {
  279. const auto& alt = node.GetBlock2().GetAlt1();
  280. TPosition distinctPos;
  281. if (IsDistinctOptSet(alt.GetRule_opt_set_quantifier1(), distinctPos)) {
  282. if (!DistinctAllowed) {
  283. if (UsingCallExpr) {
  284. Ctx.Error(distinctPos) << "DISTINCT can not be used in PROCESS/REDUCE";
  285. } else {
  286. Ctx.Error(distinctPos) << "DISTINCT can only be used in aggregation functions";
  287. }
  288. return false;
  289. }
  290. YQL_ENSURE(AggMode == EAggregateMode::Normal);
  291. AggMode = EAggregateMode::Distinct;
  292. Ctx.IncrementMonCounter("sql_features", "DistinctInCallExpr");
  293. }
  294. if (!FillArgs(alt.GetRule_named_expr_list2())) {
  295. return false;
  296. }
  297. for (const auto& arg : Args) {
  298. if (arg->GetLabel()) {
  299. NamedArgs.push_back(arg);
  300. }
  301. else {
  302. PositionalArgs.push_back(arg);
  303. if (!NamedArgs.empty()) {
  304. Ctx.Error(arg->GetPos()) << "Unnamed arguments can not follow after named one";
  305. return false;
  306. }
  307. }
  308. }
  309. break;
  310. }
  311. case TRule_invoke_expr::TBlock2::kAlt2:
  312. if (IsExternalCall) {
  313. Ctx.Error() << "You should set EXTERNAL FUNCTION type. Example: EXTERNAL FUNCTION('YANDEX-CLOUD', ...)";
  314. } else {
  315. Args.push_back(new TAsteriskNode(Pos));
  316. }
  317. break;
  318. case TRule_invoke_expr::TBlock2::ALT_NOT_SET:
  319. Y_ABORT("You should change implementation according to grammar changes");
  320. }
  321. }
  322. const auto& tail = node.GetRule_invoke_expr_tail4();
  323. if (tail.HasBlock1()) {
  324. if (IsExternalCall) {
  325. Ctx.Error() << "Additional clause after EXTERNAL FUNCTION(...) is not supported";
  326. return false;
  327. }
  328. switch (tail.GetBlock1().Alt_case()) {
  329. case TRule_invoke_expr_tail::TBlock1::kAlt1: {
  330. if (!tail.HasBlock2()) {
  331. Ctx.Error() << "RESPECT/IGNORE NULLS can only be used with window functions";
  332. return false;
  333. }
  334. const auto& alt = tail.GetBlock1().GetAlt1();
  335. if (alt.GetRule_null_treatment1().Alt_case() == TRule_null_treatment::kAltNullTreatment2) {
  336. SetIgnoreNulls();
  337. }
  338. break;
  339. }
  340. case TRule_invoke_expr_tail::TBlock1::kAlt2: {
  341. Ctx.Error() << "FILTER clause is not supported yet";
  342. return false;
  343. }
  344. case TRule_invoke_expr_tail::TBlock1::ALT_NOT_SET:
  345. Y_ABORT("You should change implementation according to grammar changes");
  346. }
  347. }
  348. if (tail.HasBlock2()) {
  349. if (Ctx.DistinctOverWindow) {
  350. AggMode == EAggregateMode::Distinct ? SetOverWindowDistinct() : SetOverWindow();
  351. } else {
  352. if (AggMode == EAggregateMode::Distinct) {
  353. Ctx.Error() << "DISTINCT is not yet supported in window functions";
  354. return false;
  355. }
  356. SetOverWindow();
  357. }
  358. auto winRule = tail.GetBlock2().GetRule_window_name_or_specification2();
  359. switch (winRule.Alt_case()) {
  360. case TRule_window_name_or_specification::kAltWindowNameOrSpecification1: {
  361. WindowName = Id(winRule.GetAlt_window_name_or_specification1().GetRule_window_name1().GetRule_an_id_window1(), *this);
  362. break;
  363. }
  364. case TRule_window_name_or_specification::kAltWindowNameOrSpecification2: {
  365. if (!Ctx.WinSpecsScopes) {
  366. auto pos = Ctx.TokenPosition(tail.GetBlock2().GetToken1());
  367. Ctx.Error(pos) << "Window and aggregation functions are not allowed in this context";
  368. return false;
  369. }
  370. TWindowSpecificationPtr spec = WindowSpecification(
  371. winRule.GetAlt_window_name_or_specification2().GetRule_window_specification1().GetRule_window_specification_details2());
  372. if (!spec) {
  373. return false;
  374. }
  375. WindowName = Ctx.MakeName("_yql_anonymous_window");
  376. TWinSpecs& specs = Ctx.WinSpecsScopes.back();
  377. YQL_ENSURE(!specs.contains(WindowName));
  378. specs[WindowName] = spec;
  379. break;
  380. }
  381. case TRule_window_name_or_specification::ALT_NOT_SET:
  382. Y_ABORT("You should change implementation according to grammar changes");
  383. }
  384. Ctx.IncrementMonCounter("sql_features", "WindowFunctionOver");
  385. }
  386. return true;
  387. }
  388. void TSqlCallExpr::IncCounters() {
  389. if (Node) {
  390. Ctx.IncrementMonCounter("sql_features", "NamedNodeUseApply");
  391. } else if (!Module.empty()) {
  392. if (ValidateForCounters(Module)) {
  393. Ctx.IncrementMonCounter("udf_modules", Module);
  394. Ctx.IncrementMonCounter("sql_features", "CallUdf");
  395. if (ValidateForCounters(Func)) {
  396. auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(Module);
  397. if (scriptType == NKikimr::NMiniKQL::EScriptType::Unknown) {
  398. Ctx.IncrementMonCounter("udf_functions", Module + "." + Func);
  399. }
  400. }
  401. }
  402. } else if (ValidateForCounters(Func)) {
  403. Ctx.IncrementMonCounter("sql_builtins", Func);
  404. Ctx.IncrementMonCounter("sql_features", "CallBuiltin");
  405. }
  406. }
  407. } // namespace NSQLTranslationV1