sql_call_expr.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. #include "sql_call_expr.h"
  2. #include "sql_expression.h"
  3. #include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
  4. #include <yql/essentials/minikql/mkql_program_builder.h>
  5. namespace NSQLTranslationV1 {
  6. TNodePtr BuildSqlCall(TContext& ctx, TPosition pos, const TString& module, const TString& name, const TVector<TNodePtr>& args,
  7. TNodePtr positionalArgs, TNodePtr namedArgs, TNodePtr customUserType, const TDeferredAtom& typeConfig, TNodePtr runConfig);
  8. using namespace NSQLv1Generated;
  9. static bool ValidateForCounters(const TString& input) {
  10. for (auto c : input) {
  11. if (!(IsAlnum(c) || c == '_')) {
  12. return false;
  13. }
  14. }
  15. return true;
  16. }
  17. TNodePtr TSqlCallExpr::BuildUdf(bool forReduce) {
  18. auto result = Node ? Node : BuildCallable(Pos, Module, Func, Args, forReduce);
  19. if (to_lower(Module) == "tensorflow" && Func == "RunBatch") {
  20. if (Args.size() > 2) {
  21. Args.erase(Args.begin() + 2);
  22. } else {
  23. Ctx.Error(Pos) << "Excepted >= 3 arguments, but got: " << Args.size();
  24. return nullptr;
  25. }
  26. }
  27. return result;
  28. }
  29. TNodePtr TSqlCallExpr::BuildCall() {
  30. TVector<TNodePtr> args;
  31. bool warnOnYqlNameSpace = true;
  32. TUdfNode* udf_node = Node ? Node->GetUdfNode() : nullptr;
  33. if (udf_node) {
  34. if (!udf_node->DoInit(Ctx, nullptr)) {
  35. return nullptr;
  36. }
  37. TNodePtr positional_args = BuildTuple(Pos, PositionalArgs);
  38. TNodePtr positional = positional_args->Y("TypeOf", positional_args);
  39. TNodePtr named_args = BuildStructure(Pos, NamedArgs);
  40. TNodePtr named = named_args->Y("TypeOf", named_args);
  41. TNodePtr custom_user_type = new TCallNodeImpl(Pos, "TupleType", {positional, named, udf_node->GetExternalTypes()});
  42. return BuildSqlCall(Ctx, Pos, udf_node->GetModule(), udf_node->GetFunction(),
  43. args, positional_args, named_args, custom_user_type,
  44. udf_node->GetTypeConfig(), udf_node->GetRunConfig());
  45. }
  46. if (Node && !Node->FuncName()) {
  47. Module = "YQL";
  48. Func = NamedArgs.empty() ? "Apply" : "NamedApply";
  49. warnOnYqlNameSpace = false;
  50. args.push_back(Node);
  51. }
  52. if (Node && Node->FuncName()) {
  53. Module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
  54. Func = *Node->FuncName();
  55. }
  56. bool mustUseNamed = !NamedArgs.empty();
  57. if (mustUseNamed) {
  58. if (Node && !Node->FuncName()) {
  59. mustUseNamed = false;
  60. }
  61. args.emplace_back(BuildTuple(Pos, PositionalArgs));
  62. args.emplace_back(BuildStructure(Pos, NamedArgs));
  63. } else if (IsExternalCall) {
  64. Func = "SqlExternalFunction";
  65. if (Args.size() < 2 || Args.size() > 3) {
  66. Ctx.Error(Pos) << "EXTERNAL FUNCTION requires from 2 to 3 arguments, but got: " << Args.size();
  67. return nullptr;
  68. }
  69. if (Args.size() == 3) {
  70. args.insert(args.end(), Args.begin(), Args.end() - 1);
  71. Args.erase(Args.begin(), Args.end() - 1);
  72. } else {
  73. args.insert(args.end(), Args.begin(), Args.end());
  74. Args.erase(Args.begin(), Args.end());
  75. }
  76. auto configNode = new TExternalFunctionConfig(Pos, CallConfig);
  77. auto configList = new TAstListNodeImpl(Pos, { new TAstAtomNodeImpl(Pos, "quote", 0), configNode });
  78. args.push_back(configList);
  79. } else {
  80. args.insert(args.end(), Args.begin(), Args.end());
  81. }
  82. auto result = BuildBuiltinFunc(Ctx, Pos, Func, args, Module, AggMode, &mustUseNamed, warnOnYqlNameSpace);
  83. if (mustUseNamed) {
  84. Error() << "Named args are used for call, but unsupported by function: " << Func;
  85. return nullptr;
  86. }
  87. if (WindowName) {
  88. result = BuildCalcOverWindow(Pos, WindowName, result);
  89. }
  90. return result;
  91. }
  92. bool TSqlCallExpr::Init(const TRule_value_constructor& node) {
  93. switch (node.Alt_case()) {
  94. case TRule_value_constructor::kAltValueConstructor1: {
  95. auto& ctor = node.GetAlt_value_constructor1();
  96. Func = "Variant";
  97. TSqlExpression expr(Ctx, Mode);
  98. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  99. return false;
  100. }
  101. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  102. return false;
  103. }
  104. if (!Expr(expr, Args, ctor.GetRule_expr7())) {
  105. return false;
  106. }
  107. break;
  108. }
  109. case TRule_value_constructor::kAltValueConstructor2: {
  110. auto& ctor = node.GetAlt_value_constructor2();
  111. Func = "Enum";
  112. TSqlExpression expr(Ctx, Mode);
  113. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  114. return false;
  115. }
  116. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  117. return false;
  118. }
  119. break;
  120. }
  121. case TRule_value_constructor::kAltValueConstructor3: {
  122. auto& ctor = node.GetAlt_value_constructor3();
  123. Func = "Callable";
  124. TSqlExpression expr(Ctx, Mode);
  125. if (!Expr(expr, Args, ctor.GetRule_expr3())) {
  126. return false;
  127. }
  128. if (!Expr(expr, Args, ctor.GetRule_expr5())) {
  129. return false;
  130. }
  131. break;
  132. }
  133. case TRule_value_constructor::ALT_NOT_SET:
  134. Y_ABORT("You should change implementation according to grammar changes");
  135. }
  136. PositionalArgs = Args;
  137. return true;
  138. }
  139. bool TSqlCallExpr::ExtractCallParam(const TRule_external_call_param& node) {
  140. TString paramName = Id(node.GetRule_an_id1(), *this);
  141. paramName = to_lower(paramName);
  142. if (CallConfig.contains(paramName)) {
  143. Ctx.Error() << "WITH " << to_upper(paramName).Quote()
  144. << " clause should be specified only once";
  145. return false;
  146. }
  147. const bool optimizeForParam = paramName == "optimize_for";
  148. const auto columnRefState = optimizeForParam ? EColumnRefState::AsStringLiteral : EColumnRefState::Deny;
  149. TColumnRefScope scope(Ctx, columnRefState);
  150. if (optimizeForParam) {
  151. scope.SetNoColumnErrContext("in external call params");
  152. }
  153. TSqlExpression expression(Ctx, Mode);
  154. auto value = expression.Build(node.GetRule_expr3());
  155. if (value && optimizeForParam) {
  156. TDeferredAtom atom;
  157. MakeTableFromExpression(Ctx.Pos(), Ctx, value, atom);
  158. value = new TCallNodeImpl(Ctx.Pos(), "String", { atom.Build() });
  159. }
  160. if (!value) {
  161. return false;
  162. }
  163. CallConfig[paramName] = value;
  164. return true;
  165. }
  166. bool TSqlCallExpr::ConfigureExternalCall(const TRule_external_call_settings& node) {
  167. bool success = ExtractCallParam(node.GetRule_external_call_param1());
  168. for (auto& block: node.GetBlock2()) {
  169. success = ExtractCallParam(block.GetRule_external_call_param2()) && success;
  170. }
  171. return success;
  172. }
  173. bool TSqlCallExpr::Init(const TRule_using_call_expr& node) {
  174. // using_call_expr: ((an_id_or_type NAMESPACE an_id_or_type) | an_id_expr | bind_parameter | (EXTERNAL FUNCTION)) invoke_expr;
  175. const auto& block = node.GetBlock1();
  176. switch (block.Alt_case()) {
  177. case TRule_using_call_expr::TBlock1::kAlt1: {
  178. auto& subblock = block.GetAlt1();
  179. Module = Id(subblock.GetRule_an_id_or_type1(), *this);
  180. Func = Id(subblock.GetRule_an_id_or_type3(), *this);
  181. break;
  182. }
  183. case TRule_using_call_expr::TBlock1::kAlt2: {
  184. Func = Id(block.GetAlt2().GetRule_an_id_expr1(), *this);
  185. break;
  186. }
  187. case TRule_using_call_expr::TBlock1::kAlt3: {
  188. TString bindName;
  189. if (!NamedNodeImpl(block.GetAlt3().GetRule_bind_parameter1(), bindName, *this)) {
  190. return false;
  191. }
  192. Node = GetNamedNode(bindName);
  193. if (!Node) {
  194. return false;
  195. }
  196. break;
  197. }
  198. case TRule_using_call_expr::TBlock1::kAlt4: {
  199. IsExternalCall = true;
  200. break;
  201. }
  202. case TRule_using_call_expr::TBlock1::ALT_NOT_SET:
  203. Y_ABORT("You should change implementation according to grammar changes");
  204. }
  205. YQL_ENSURE(!DistinctAllowed);
  206. UsingCallExpr = true;
  207. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  208. return Init(node.GetRule_invoke_expr2());
  209. }
  210. void TSqlCallExpr::InitName(const TString& name) {
  211. Module = "";
  212. Func = name;
  213. }
  214. void TSqlCallExpr::InitExpr(const TNodePtr& expr) {
  215. Node = expr;
  216. }
  217. bool TSqlCallExpr::FillArg(const TString& module, const TString& func, size_t& idx, const TRule_named_expr& node) {
  218. const bool isNamed = node.HasBlock2();
  219. TMaybe<EColumnRefState> status;
  220. // TODO: support named args
  221. if (!isNamed) {
  222. status = GetFunctionArgColumnStatus(Ctx, module, func, idx);
  223. }
  224. TNodePtr expr;
  225. if (status) {
  226. TColumnRefScope scope(Ctx, *status, /* isTopLevel = */ false);
  227. expr = NamedExpr(node);
  228. } else {
  229. expr = NamedExpr(node);
  230. }
  231. if (!expr) {
  232. return false;
  233. }
  234. Args.emplace_back(std::move(expr));
  235. if (!isNamed) {
  236. ++idx;
  237. }
  238. return true;
  239. }
  240. bool TSqlCallExpr::FillArgs(const TRule_named_expr_list& node) {
  241. TString module = Module;
  242. TString func = Func;
  243. if (Node && Node->FuncName()) {
  244. module = Node->ModuleName() ? *Node->ModuleName() : "YQL";
  245. func = *Node->FuncName();
  246. }
  247. size_t idx = 0;
  248. if (!FillArg(module, func, idx, node.GetRule_named_expr1())) {
  249. return false;
  250. }
  251. for (auto& b: node.GetBlock2()) {
  252. if (!FillArg(module, func, idx, b.GetRule_named_expr2())) {
  253. return false;
  254. }
  255. }
  256. return true;
  257. }
  258. bool TSqlCallExpr::Init(const TRule_invoke_expr& node) {
  259. // invoke_expr: LPAREN (opt_set_quantifier named_expr_list COMMA? | ASTERISK)? RPAREN invoke_expr_tail;
  260. // invoke_expr_tail:
  261. // (null_treatment | filter_clause)? (OVER window_name_or_specification)?
  262. // ;
  263. Pos = Ctx.Pos();
  264. if (node.HasBlock2()) {
  265. switch (node.GetBlock2().Alt_case()) {
  266. case TRule_invoke_expr::TBlock2::kAlt1: {
  267. const auto& alt = node.GetBlock2().GetAlt1();
  268. TPosition distinctPos;
  269. if (IsDistinctOptSet(alt.GetRule_opt_set_quantifier1(), distinctPos)) {
  270. if (!DistinctAllowed) {
  271. if (UsingCallExpr) {
  272. Ctx.Error(distinctPos) << "DISTINCT can not be used in PROCESS/REDUCE";
  273. } else {
  274. Ctx.Error(distinctPos) << "DISTINCT can only be used in aggregation functions";
  275. }
  276. return false;
  277. }
  278. YQL_ENSURE(AggMode == EAggregateMode::Normal);
  279. AggMode = EAggregateMode::Distinct;
  280. Ctx.IncrementMonCounter("sql_features", "DistinctInCallExpr");
  281. }
  282. if (!FillArgs(alt.GetRule_named_expr_list2())) {
  283. return false;
  284. }
  285. for (const auto& arg : Args) {
  286. if (arg->GetLabel()) {
  287. NamedArgs.push_back(arg);
  288. }
  289. else {
  290. PositionalArgs.push_back(arg);
  291. if (!NamedArgs.empty()) {
  292. Ctx.Error(arg->GetPos()) << "Unnamed arguments can not follow after named one";
  293. return false;
  294. }
  295. }
  296. }
  297. break;
  298. }
  299. case TRule_invoke_expr::TBlock2::kAlt2:
  300. if (IsExternalCall) {
  301. Ctx.Error() << "You should set EXTERNAL FUNCTION type. Example: EXTERNAL FUNCTION('YANDEX-CLOUD', ...)";
  302. } else {
  303. Args.push_back(new TAsteriskNode(Pos));
  304. }
  305. break;
  306. case TRule_invoke_expr::TBlock2::ALT_NOT_SET:
  307. Y_ABORT("You should change implementation according to grammar changes");
  308. }
  309. }
  310. const auto& tail = node.GetRule_invoke_expr_tail4();
  311. if (tail.HasBlock1()) {
  312. if (IsExternalCall) {
  313. Ctx.Error() << "Additional clause after EXTERNAL FUNCTION(...) is not supported";
  314. return false;
  315. }
  316. switch (tail.GetBlock1().Alt_case()) {
  317. case TRule_invoke_expr_tail::TBlock1::kAlt1: {
  318. if (!tail.HasBlock2()) {
  319. Ctx.Error() << "RESPECT/IGNORE NULLS can only be used with window functions";
  320. return false;
  321. }
  322. const auto& alt = tail.GetBlock1().GetAlt1();
  323. if (alt.GetRule_null_treatment1().Alt_case() == TRule_null_treatment::kAltNullTreatment2) {
  324. SetIgnoreNulls();
  325. }
  326. break;
  327. }
  328. case TRule_invoke_expr_tail::TBlock1::kAlt2: {
  329. Ctx.Error() << "FILTER clause is not supported yet";
  330. return false;
  331. }
  332. case TRule_invoke_expr_tail::TBlock1::ALT_NOT_SET:
  333. Y_ABORT("You should change implementation according to grammar changes");
  334. }
  335. }
  336. if (tail.HasBlock2()) {
  337. if (Ctx.DistinctOverWindow) {
  338. AggMode == EAggregateMode::Distinct ? SetOverWindowDistinct() : SetOverWindow();
  339. } else {
  340. if (AggMode == EAggregateMode::Distinct) {
  341. Ctx.Error() << "DISTINCT is not yet supported in window functions";
  342. return false;
  343. }
  344. SetOverWindow();
  345. }
  346. auto winRule = tail.GetBlock2().GetRule_window_name_or_specification2();
  347. switch (winRule.Alt_case()) {
  348. case TRule_window_name_or_specification::kAltWindowNameOrSpecification1: {
  349. WindowName = Id(winRule.GetAlt_window_name_or_specification1().GetRule_window_name1().GetRule_an_id_window1(), *this);
  350. break;
  351. }
  352. case TRule_window_name_or_specification::kAltWindowNameOrSpecification2: {
  353. if (!Ctx.WinSpecsScopes) {
  354. auto pos = Ctx.TokenPosition(tail.GetBlock2().GetToken1());
  355. Ctx.Error(pos) << "Window and aggregation functions are not allowed in this context";
  356. return false;
  357. }
  358. TWindowSpecificationPtr spec = WindowSpecification(
  359. winRule.GetAlt_window_name_or_specification2().GetRule_window_specification1().GetRule_window_specification_details2());
  360. if (!spec) {
  361. return false;
  362. }
  363. WindowName = Ctx.MakeName("_yql_anonymous_window");
  364. TWinSpecs& specs = Ctx.WinSpecsScopes.back();
  365. YQL_ENSURE(!specs.contains(WindowName));
  366. specs[WindowName] = spec;
  367. break;
  368. }
  369. case TRule_window_name_or_specification::ALT_NOT_SET:
  370. Y_ABORT("You should change implementation according to grammar changes");
  371. }
  372. Ctx.IncrementMonCounter("sql_features", "WindowFunctionOver");
  373. }
  374. return true;
  375. }
  376. void TSqlCallExpr::IncCounters() {
  377. if (Node) {
  378. Ctx.IncrementMonCounter("sql_features", "NamedNodeUseApply");
  379. } else if (!Module.empty()) {
  380. if (ValidateForCounters(Module)) {
  381. Ctx.IncrementMonCounter("udf_modules", Module);
  382. Ctx.IncrementMonCounter("sql_features", "CallUdf");
  383. if (ValidateForCounters(Func)) {
  384. auto scriptType = NKikimr::NMiniKQL::ScriptTypeFromStr(Module);
  385. if (scriptType == NKikimr::NMiniKQL::EScriptType::Unknown) {
  386. Ctx.IncrementMonCounter("udf_functions", Module + "." + Func);
  387. }
  388. }
  389. }
  390. } else if (ValidateForCounters(Func)) {
  391. Ctx.IncrementMonCounter("sql_builtins", Func);
  392. Ctx.IncrementMonCounter("sql_features", "CallBuiltin");
  393. }
  394. }
  395. } // namespace NSQLTranslationV1