sql_expression.cpp 99 KB


  1. #include "sql_expression.h"
  2. #include "sql_call_expr.h"
  3. #include "sql_select.h"
  4. #include "sql_values.h"
  5. #include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
  6. #include <yql/essentials/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4Lexer.h>
  7. #include <yql/essentials/utils/utf8.h>
  8. #include <util/charset/wide.h>
  9. #include <util/string/ascii.h>
  10. #include <util/string/hex.h>
  11. namespace NSQLTranslationV1 {
  12. using NALPDefault::SQLv1LexerTokens;
  13. using NALPDefaultAntlr4::SQLv1Antlr4Lexer;
  14. using namespace NSQLv1Generated;
  15. TNodePtr TSqlExpression::Build(const TRule_expr& node) {
  16. // expr:
  17. // or_subexpr (OR or_subexpr)*
  18. // | type_name_composite
  19. switch (node.Alt_case()) {
  20. case TRule_expr::kAltExpr1: {
  21. auto getNode = [](const TRule_expr_TAlt1_TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); };
  22. return BinOper("Or", node.GetAlt_expr1().GetRule_or_subexpr1(), getNode,
  23. node.GetAlt_expr1().GetBlock2().begin(), node.GetAlt_expr1().GetBlock2().end(), {});
  24. }
  25. case TRule_expr::kAltExpr2: {
  26. return TypeNode(node.GetAlt_expr2().GetRule_type_name_composite1());
  27. }
  28. case TRule_expr::ALT_NOT_SET:
  29. Y_ABORT("You should change implementation according to grammar changes");
  30. }
  31. }
  32. TNodePtr TSqlExpression::Build(const TRule_lambda_or_parameter& node) {
  33. // lambda_or_parameter:
  34. // lambda
  35. // | bind_parameter
  36. switch (node.Alt_case()) {
  37. case TRule_lambda_or_parameter::kAltLambdaOrParameter1: {
  38. return LambdaRule(node.alt_lambda_or_parameter1().GetRule_lambda1());
  39. }
  40. case TRule_lambda_or_parameter::kAltLambdaOrParameter2: {
  41. TString named;
  42. if (!NamedNodeImpl(node.GetAlt_lambda_or_parameter2().GetRule_bind_parameter1(), named, *this)) {
  43. return nullptr;
  44. }
  45. auto namedNode = GetNamedNode(named);
  46. if (!namedNode) {
  47. return nullptr;
  48. }
  49. return namedNode;
  50. }
  51. case TRule_lambda_or_parameter::ALT_NOT_SET:
  52. Y_ABORT("You should change implementation according to grammar changes");
  53. }
  54. }
  55. TNodePtr TSqlExpression::SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail) {
  56. // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*;
  57. auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); };
  58. return BinOper("Concat", node.GetRule_con_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  59. }
  60. TNodePtr TSqlExpression::SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail) {
  61. // add_subexpr: mul_subexpr ((ASTERISK | SLASH | PERCENT) mul_subexpr)*;
  62. auto getNode = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); };
  63. return BinOpList(node.GetRule_mul_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  64. }
  65. TNodePtr TSqlExpression::SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail) {
  66. // bit_subexpr: add_subexpr ((PLUS | MINUS) add_subexpr)*;
  67. auto getNode = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); };
  68. return BinOpList(node.GetRule_add_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  69. }
  70. TNodePtr TSqlExpression::SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal) {
  71. //neq_subexpr: bit_subexpr ((SHIFT_LEFT | shift_right | ROT_LEFT | rot_right | AMPERSAND | PIPE | CARET) bit_subexpr)*
  72. // // trailing QUESTIONS are used in optional simple types (String?) and optional lambda args: ($x, $y?) -> ($x)
  73. // ((double_question neq_subexpr) => double_question neq_subexpr | QUESTION+)?;
  74. YQL_ENSURE(tailExternal.Count == 0);
  75. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock3();
  76. TTrailingQuestions tail;
  77. if (node.HasBlock3() && node.GetBlock3().Alt_case() == TRule_neq_subexpr::TBlock3::kAlt2) {
  78. auto& questions = node.GetBlock3().GetAlt2();
  79. tail.Count = questions.GetBlock1().size();
  80. tail.Pos = Ctx.TokenPosition(questions.GetBlock1().begin()->GetToken1());
  81. YQL_ENSURE(tail.Count > 0);
  82. }
  83. auto getNode = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); };
  84. auto result = BinOpList(node.GetRule_bit_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  85. if (!result) {
  86. return {};
  87. }
  88. if (node.HasBlock3()) {
  89. auto& block = node.GetBlock3();
  90. if (block.Alt_case() == TRule_neq_subexpr::TBlock3::kAlt1) {
  91. TSqlExpression altExpr(Ctx, Mode);
  92. auto altResult = SubExpr(block.GetAlt1().GetRule_neq_subexpr2(), {});
  93. if (!altResult) {
  94. return {};
  95. }
  96. const TVector<TNodePtr> args({result, altResult});
  97. Token(block.GetAlt1().GetRule_double_question1().GetToken1());
  98. result = BuildBuiltinFunc(Ctx, Ctx.Pos(), "Coalesce", args);
  99. }
  100. }
  101. return result;
  102. }
  103. TNodePtr TSqlExpression::SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail) {
  104. // eq_subexpr: neq_subexpr ((LESS | LESS_OR_EQ | GREATER | GREATER_OR_EQ) neq_subexpr)*;
  105. auto getNode = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); };
  106. return BinOpList(node.GetRule_neq_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  107. }
  108. TNodePtr TSqlExpression::SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail) {
  109. // or_subexpr: and_subexpr (AND and_subexpr)*;
  110. auto getNode = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); };
  111. return BinOper("And", node.GetRule_and_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  112. }
  113. TNodePtr TSqlExpression::SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail) {
  114. // and_subexpr: xor_subexpr (XOR xor_subexpr)*;
  115. auto getNode = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); };
  116. return BinOper("Xor", node.GetRule_xor_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  117. }
  118. bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
  119. const auto id = IdEx(node.GetRule_an_id1(), ctx);
  120. if (alter) {
  121. // currently we don't support alter settings
  122. ctx.Error() << to_upper(id.Name) << " alter is not supported";
  123. return false;
  124. }
  125. const auto& setting = node.GetRule_changefeed_setting_value3();
  126. auto exprNode = ctx.Build(setting.GetRule_expr1());
  127. if (!exprNode) {
  128. ctx.Context().Error(id.Pos) << "Invalid changefeed setting: " << id.Name;
  129. return false;
  130. }
  131. if (to_lower(id.Name) == "sink_type") {
  132. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  133. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  134. return false;
  135. }
  136. const auto value = exprNode->GetLiteralValue();
  137. if (to_lower(value) == "local") {
  138. settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings();
  139. } else {
  140. ctx.Context().Error() << "Unknown changefeed sink type: " << value;
  141. return false;
  142. }
  143. } else if (to_lower(id.Name) == "mode") {
  144. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  145. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  146. return false;
  147. }
  148. settings.Mode = exprNode;
  149. } else if (to_lower(id.Name) == "format") {
  150. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  151. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  152. return false;
  153. }
  154. settings.Format = exprNode;
  155. } else if (to_lower(id.Name) == "initial_scan") {
  156. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
  157. ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
  158. return false;
  159. }
  160. settings.InitialScan = exprNode;
  161. } else if (to_lower(id.Name) == "virtual_timestamps") {
  162. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
  163. ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
  164. return false;
  165. }
  166. settings.VirtualTimestamps = exprNode;
  167. } else if (to_lower(id.Name) == "barriers_interval" || to_lower(id.Name) == "resolved_timestamps") {
  168. if (exprNode->GetOpName() != "Interval") {
  169. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  170. return false;
  171. }
  172. settings.BarriersInterval = exprNode;
  173. } else if (to_lower(id.Name) == "retention_period") {
  174. if (exprNode->GetOpName() != "Interval") {
  175. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  176. return false;
  177. }
  178. settings.RetentionPeriod = exprNode;
  179. } else if (to_lower(id.Name) == "topic_auto_partitioning") {
  180. auto v = to_lower(exprNode->GetLiteralValue());
  181. if (v != "enabled" && v != "disabled") {
  182. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  183. }
  184. settings.TopicAutoPartitioning = exprNode;
  185. } else if (to_lower(id.Name) == "topic_max_active_partitions") {
  186. if (!exprNode->IsIntegerLiteral()) {
  187. ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
  188. return false;
  189. }
  190. settings.TopicMaxActivePartitions = exprNode;
  191. } else if (to_lower(id.Name) == "topic_min_active_partitions") {
  192. if (!exprNode->IsIntegerLiteral()) {
  193. ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
  194. return false;
  195. }
  196. settings.TopicPartitions = exprNode;
  197. } else if (to_lower(id.Name) == "aws_region") {
  198. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  199. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  200. return false;
  201. }
  202. settings.AwsRegion = exprNode;
  203. } else {
  204. ctx.Context().Error(id.Pos) << "Unknown changefeed setting: " << id.Name;
  205. return false;
  206. }
  207. return true;
  208. }
  209. bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
  210. if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) {
  211. return false;
  212. }
  213. for (auto& block : node.GetBlock2()) {
  214. if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) {
  215. return false;
  216. }
  217. }
  218. return true;
  219. }
  220. bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds) {
  221. changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx));
  222. if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) {
  223. return false;
  224. }
  225. return true;
  226. }
  227. namespace {
  228. bool WithoutAlpha(const std::string_view &literal) {
  229. return literal.cend() == std::find_if(literal.cbegin(), literal.cend(), [](char c) { return std::isalpha(c) || (c & '\x80'); });
  230. }
  231. }
  232. bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node) {
  233. TNodePtr exprNode = sqlExpr.Build(node);
  234. if (!exprNode) {
  235. return false;
  236. }
  237. exprNodes.push_back(exprNode);
  238. return true;
  239. }
  240. bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node) {
  241. if (!Expr(sqlExpr, exprNodes, node.GetRule_expr1())) {
  242. return false;
  243. }
  244. for (auto b: node.GetBlock2()) {
  245. sqlExpr.Token(b.GetToken1());
  246. if (!Expr(sqlExpr, exprNodes, b.GetRule_expr2())) {
  247. return false;
  248. }
  249. }
  250. return true;
  251. }
  252. bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix) {
  253. const auto str = to_lower(strOrig);
  254. const auto strLen = str.size();
  255. ui64 base = 10;
  256. if (strLen > 2 && str[0] == '0') {
  257. const auto formatChar = str[1];
  258. if (formatChar == 'x') {
  259. base = 16;
  260. } else if (formatChar == 'o') {
  261. base = 8;
  262. } else if (formatChar == 'b') {
  263. base = 2;
  264. }
  265. }
  266. if (strLen > 1) {
  267. auto iter = str.cend() - 1;
  268. if (*iter == 'l' || *iter == 's' || *iter == 't' || *iter == 's' || *iter == 'i' || *iter == 'b' || *iter == 'n') {
  269. --iter;
  270. }
  271. if (*iter == 'u' || *iter == 'p') {
  272. --iter;
  273. }
  274. suffix = TString(++iter, str.cend());
  275. }
  276. value = 0;
  277. const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size());
  278. for (const char& cur: digString) {
  279. const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)];
  280. if (curDigit >= base) {
  281. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur <<
  282. "' is out of base: " << base;
  283. return false;
  284. }
  285. ui64 curValue = value;
  286. value *= base;
  287. bool overflow = ((value / base) != curValue);
  288. if (!overflow) {
  289. curValue = value;
  290. value += curDigit;
  291. overflow = value < curValue;
  292. }
  293. if (overflow) {
  294. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", number limit overflow";
  295. return false;
  296. }
  297. }
  298. return true;
  299. }
  300. TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node) {
  301. const TString intergerString = ctx.Token(node.GetToken1());
  302. if (to_lower(intergerString).EndsWith("pn")) {
  303. // TODO: add validation
  304. return new TLiteralNode(ctx.Pos(), "PgNumeric", intergerString.substr(0, intergerString.size() - 2));
  305. }
  306. ui64 value;
  307. TString suffix;
  308. if (!ParseNumbers(ctx, intergerString, value, suffix)) {
  309. return {};
  310. }
  311. const bool noSpaceForInt32 = value >> 31;
  312. const bool noSpaceForInt64 = value >> 63;
  313. if (suffix == "") {
  314. bool implicitType = true;
  315. if (noSpaceForInt64) {
  316. return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value), implicitType);
  317. } else if (noSpaceForInt32) {
  318. return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value), implicitType);
  319. }
  320. return new TLiteralNumberNode<i32>(ctx.Pos(), "Int32", ToString(value), implicitType);
  321. } else if (suffix == "p") {
  322. bool implicitType = true;
  323. if (noSpaceForInt64) {
  324. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", 64 bit signed integer overflow";
  325. return {};
  326. } else if (noSpaceForInt32) {
  327. return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value), implicitType);
  328. }
  329. return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value), implicitType);
  330. } else if (suffix == "u") {
  331. return new TLiteralNumberNode<ui32>(ctx.Pos(), "Uint32", ToString(value));
  332. } else if (suffix == "ul") {
  333. return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value));
  334. } else if (suffix == "ut") {
  335. return new TLiteralNumberNode<ui8>(ctx.Pos(), "Uint8", ToString(value));
  336. } else if (suffix == "t") {
  337. return new TLiteralNumberNode<i8>(ctx.Pos(), "Int8", ToString(value));
  338. } else if (suffix == "l") {
  339. return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value));
  340. } else if (suffix == "us") {
  341. return new TLiteralNumberNode<ui16>(ctx.Pos(), "Uint16", ToString(value));
  342. } else if (suffix == "s") {
  343. return new TLiteralNumberNode<i16>(ctx.Pos(), "Int16", ToString(value));
  344. } else if (suffix == "ps") {
  345. return new TLiteralNumberNode<i16>(ctx.Pos(), "PgInt2", ToString(value));
  346. } else if (suffix == "pi") {
  347. return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value));
  348. } else if (suffix == "pb") {
  349. return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value));
  350. } else {
  351. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", invalid suffix: " << suffix;
  352. return {};
  353. }
  354. }
  355. TNodePtr LiteralReal(TContext& ctx, const TRule_real& node) {
  356. const TString value(ctx.Token(node.GetToken1()));
  357. YQL_ENSURE(!value.empty());
  358. auto lower = to_lower(value);
  359. if (lower.EndsWith("f")) {
  360. return new TLiteralNumberNode<float>(ctx.Pos(), "Float", value.substr(0, value.size()-1));
  361. } else if (lower.EndsWith("p")) {
  362. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-1));
  363. } else if (lower.EndsWith("pf4")) {
  364. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat4", value.substr(0, value.size()-3));
  365. } else if (lower.EndsWith("pf8")) {
  366. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-3));
  367. } else if (lower.EndsWith("pn")) {
  368. return new TLiteralNode(ctx.Pos(), "PgNumeric", value.substr(0, value.size()-2));
  369. } else {
  370. return new TLiteralNumberNode<double>(ctx.Pos(), "Double", value);
  371. }
  372. }
  373. TMaybe<TExprOrIdent> TSqlExpression::LiteralExpr(const TRule_literal_value& node) {
  374. TExprOrIdent result;
  375. switch (node.Alt_case()) {
  376. case TRule_literal_value::kAltLiteralValue1: {
  377. result.Expr = LiteralNumber(Ctx, node.GetAlt_literal_value1().GetRule_integer1());
  378. break;
  379. }
  380. case TRule_literal_value::kAltLiteralValue2: {
  381. result.Expr = LiteralReal(Ctx, node.GetAlt_literal_value2().GetRule_real1());
  382. break;
  383. }
  384. case TRule_literal_value::kAltLiteralValue3: {
  385. const TString value(Token(node.GetAlt_literal_value3().GetToken1()));
  386. return BuildLiteralTypedSmartStringOrId(Ctx, value);
  387. }
  388. case TRule_literal_value::kAltLiteralValue5: {
  389. Token(node.GetAlt_literal_value5().GetToken1());
  390. result.Expr = BuildLiteralNull(Ctx.Pos());
  391. break;
  392. }
  393. case TRule_literal_value::kAltLiteralValue9: {
  394. const TString value(to_lower(Token(node.GetAlt_literal_value9().GetRule_bool_value1().GetToken1())));
  395. result.Expr = BuildLiteralBool(Ctx.Pos(), FromString<bool>(value));
  396. break;
  397. }
  398. case TRule_literal_value::kAltLiteralValue10: {
  399. result.Expr = BuildEmptyAction(Ctx.Pos());
  400. break;
  401. }
  402. case TRule_literal_value::kAltLiteralValue4:
  403. case TRule_literal_value::kAltLiteralValue6:
  404. case TRule_literal_value::kAltLiteralValue7:
  405. case TRule_literal_value::kAltLiteralValue8:
  406. case TRule_literal_value::ALT_NOT_SET:
  407. AltNotImplemented("literal_value", node);
  408. }
  409. if (!result.Expr) {
  410. return {};
  411. }
  412. return result;
  413. }
  414. template<typename TUnarySubExprType>
  415. TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailingQuestions& tail) {
  416. if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) {
  417. if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) {
  418. return UnaryCasualExpr(node.GetAlt_unary_subexpr1().GetRule_unary_casual_subexpr1(), tail);
  419. } else if (tail.Count) {
  420. UnexpectedQuestionToken(tail);
  421. return {};
  422. } else {
  423. MaybeUnnamedSmartParenOnTop = false;
  424. return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1());
  425. }
  426. } else {
  427. MaybeUnnamedSmartParenOnTop = false;
  428. if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) {
  429. return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail);
  430. } else if (tail.Count) {
  431. UnexpectedQuestionToken(tail);
  432. return {};
  433. } else {
  434. return JsonApiExpr(node.GetAlt_in_unary_subexpr2().GetRule_json_api_expr1());
  435. }
  436. }
  437. }
  438. TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) {
  439. /*
  440. jsonpath_spec: STRING_VALUE;
  441. */
  442. TString value = Token(node.GetToken1());
  443. TPosition pos = Ctx.Pos();
  444. auto parsed = StringContent(Ctx, pos, value);
  445. if (!parsed) {
  446. return nullptr;
  447. }
  448. return new TCallNodeImpl(pos, "Utf8", {BuildQuotedAtom(pos, parsed->Content, parsed->Flags)});
  449. }
  450. TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) {
  451. /*
  452. (RETURNING type_name_simple)?
  453. */
  454. return TypeSimple(node, /* onlyDataAllowed */ true);
  455. }
  456. TNodePtr TSqlExpression::JsonInputArg(const TRule_json_common_args& node) {
  457. /*
  458. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  459. */
  460. TNodePtr jsonExpr = Build(node.GetRule_expr1());
  461. if (!jsonExpr || jsonExpr->IsNull()) {
  462. jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", {
  463. new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")})
  464. });
  465. }
  466. return jsonExpr;
  467. }
  468. void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children) {
  469. /*
  470. json_variable: expr AS json_variable_name;
  471. */
  472. TNodePtr expr;
  473. TString rawName;
  474. TPosition namePos = Ctx.Pos();
  475. ui32 nameFlags = 0;
  476. expr = Build(node.GetRule_expr1());
  477. const auto& nameRule = node.GetRule_json_variable_name3();
  478. switch (nameRule.GetAltCase()) {
  479. case TRule_json_variable_name::kAltJsonVariableName1:
  480. rawName = Id(nameRule.GetAlt_json_variable_name1().GetRule_id_expr1(), *this);
  481. nameFlags = TNodeFlags::ArbitraryContent;
  482. break;
  483. case TRule_json_variable_name::kAltJsonVariableName2: {
  484. const auto& token = nameRule.GetAlt_json_variable_name2().GetToken1();
  485. namePos = GetPos(token);
  486. auto parsed = StringContentOrIdContent(Ctx, namePos, token.GetValue());
  487. if (!parsed) {
  488. return;
  489. }
  490. rawName = parsed->Content;
  491. nameFlags = parsed->Flags;
  492. break;
  493. }
  494. case TRule_json_variable_name::ALT_NOT_SET:
  495. Y_ABORT("You should change implementation according to grammar changes");
  496. }
  497. TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags);
  498. children.push_back(BuildTuple(namePos, {nameExpr, expr}));
  499. }
  500. void TSqlExpression::AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children) {
  501. /*
  502. json_variables: json_variable (COMMA json_variable)*;
  503. */
  504. AddJsonVariable(node.GetRule_json_variable1(), children);
  505. for (size_t i = 0; i < node.Block2Size(); i++) {
  506. AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children);
  507. }
  508. }
  509. TNodePtr TSqlExpression::JsonVariables(const TRule_json_common_args& node) {
  510. /*
  511. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  512. */
  513. TVector<TNodePtr> variables;
  514. TPosition pos = Ctx.Pos();
  515. if (node.HasBlock4()) {
  516. const auto& block = node.GetBlock4();
  517. pos = GetPos(block.GetToken1());
  518. AddJsonVariables(block.GetRule_json_variables2(), variables);
  519. }
  520. return new TCallNodeImpl(pos, "JsonVariables", variables);
  521. }
  522. void TSqlExpression::AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children) {
  523. /*
  524. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  525. */
  526. TNodePtr jsonExpr = JsonInputArg(node);
  527. TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3());
  528. TNodePtr variables = JsonVariables(node);
  529. children.push_back(jsonExpr);
  530. children.push_back(jsonPath);
  531. children.push_back(variables);
  532. }
  533. TNodePtr TSqlExpression::JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode) {
  534. /*
  535. json_case_handler: ERROR | NULL | (DEFAULT expr);
  536. */
  537. switch (node.GetAltCase()) {
  538. case TRule_json_case_handler::kAltJsonCaseHandler1: {
  539. const auto pos = GetPos(node.GetAlt_json_case_handler1().GetToken1());
  540. mode = EJsonValueHandlerMode::Error;
  541. return new TCallNodeImpl(pos, "Null", {});
  542. }
  543. case TRule_json_case_handler::kAltJsonCaseHandler2: {
  544. const auto pos = GetPos(node.GetAlt_json_case_handler2().GetToken1());
  545. mode = EJsonValueHandlerMode::DefaultValue;
  546. return new TCallNodeImpl(pos, "Null", {});
  547. }
  548. case TRule_json_case_handler::kAltJsonCaseHandler3:
  549. mode = EJsonValueHandlerMode::DefaultValue;
  550. return Build(node.GetAlt_json_case_handler3().GetRule_expr2());
  551. case TRule_json_case_handler::ALT_NOT_SET:
  552. Y_ABORT("You should change implementation according to grammar changes");
  553. }
  554. }
  555. void TSqlExpression::AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children) {
  556. /*
  557. json_case_handler*
  558. */
  559. if (node.Block5Size() > 2) {
  560. Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected";
  561. Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses");
  562. return;
  563. }
  564. TNodePtr onEmpty;
  565. EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue;
  566. TNodePtr onError;
  567. EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue;
  568. for (size_t i = 0; i < node.Block5Size(); i++) {
  569. const auto block = node.GetBlock5(i);
  570. const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty";
  571. if (isEmptyClause && onEmpty != nullptr) {
  572. Ctx.Error() << "Only 1 ON EMPTY clause is expected";
  573. Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses");
  574. return;
  575. }
  576. if (!isEmptyClause && onError != nullptr) {
  577. Ctx.Error() << "Only 1 ON ERROR clause is expected";
  578. Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses");
  579. return;
  580. }
  581. if (isEmptyClause && onError != nullptr) {
  582. Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause";
  583. Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError");
  584. return;
  585. }
  586. EJsonValueHandlerMode currentMode;
  587. TNodePtr currentHandler = JsonValueCaseHandler(block.GetRule_json_case_handler1(), currentMode);
  588. if (isEmptyClause) {
  589. onEmpty = currentHandler;
  590. onEmptyMode = currentMode;
  591. } else {
  592. onError = currentHandler;
  593. onErrorMode = currentMode;
  594. }
  595. }
  596. if (onEmpty == nullptr) {
  597. onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {});
  598. }
  599. if (onError == nullptr) {
  600. onError = new TCallNodeImpl(Ctx.Pos(), "Null", {});
  601. }
  602. children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default));
  603. children.push_back(onEmpty);
  604. children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default));
  605. children.push_back(onError);
  606. }
  607. TNodePtr TSqlExpression::JsonValueExpr(const TRule_json_value& node) {
  608. /*
  609. json_value: JSON_VALUE LPAREN
  610. json_common_args
  611. (RETURNING type_name_simple)?
  612. (json_case_handler ON (EMPTY | ERROR))*
  613. RPAREN;
  614. */
  615. TVector<TNodePtr> children;
  616. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  617. AddJsonValueCaseHandlers(node, children);
  618. if (node.HasBlock4()) {
  619. auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2());
  620. if (!returningType) {
  621. return {};
  622. }
  623. children.push_back(returningType);
  624. }
  625. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children);
  626. }
  627. void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children) {
  628. /*
  629. json_exists: JSON_EXISTS LPAREN
  630. json_common_args
  631. json_exists_handler?
  632. RPAREN;
  633. */
  634. auto buildJustBool = [&](const TPosition& pos, bool value) {
  635. return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)});
  636. };
  637. if (!node.HasBlock4()) {
  638. children.push_back(buildJustBool(Ctx.Pos(), false));
  639. return;
  640. }
  641. const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1();
  642. const auto& token = handlerRule.GetToken1();
  643. const auto pos = GetPos(token);
  644. const auto mode = to_lower(token.GetValue());
  645. if (mode == "unknown") {
  646. const auto nothingNode = new TCallNodeImpl(pos, "Nothing", {
  647. new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")})
  648. });
  649. children.push_back(nothingNode);
  650. } else if (mode != "error") {
  651. children.push_back(buildJustBool(pos, FromString<bool>(mode)));
  652. }
  653. }
  654. TNodePtr TSqlExpression::JsonExistsExpr(const TRule_json_exists& node) {
  655. /*
  656. json_exists: JSON_EXISTS LPAREN
  657. json_common_args
  658. json_exists_handler?
  659. RPAREN;
  660. */
  661. TVector<TNodePtr> children;
  662. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  663. AddJsonExistsHandler(node, children);
  664. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children);
  665. }
  666. EJsonQueryWrap TSqlExpression::JsonQueryWrapper(const TRule_json_query& node) {
  667. /*
  668. json_query: JSON_QUERY LPAREN
  669. json_common_args
  670. (json_query_wrapper WRAPPER)?
  671. (json_query_handler ON EMPTY)?
  672. (json_query_handler ON ERROR)?
  673. RPAREN;
  674. */
  675. // default behaviour - no wrapping
  676. if (!node.HasBlock4()) {
  677. return EJsonQueryWrap::NoWrap;
  678. }
  679. // WITHOUT ARRAY? - no wrapping
  680. const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1();
  681. if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) {
  682. return EJsonQueryWrap::NoWrap;
  683. }
  684. // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL
  685. const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2();
  686. if (!withWrapperRule.HasBlock2()) {
  687. return EJsonQueryWrap::Wrap;
  688. }
  689. const auto& token = withWrapperRule.GetBlock2().GetToken1();
  690. if (to_lower(token.GetValue()) == "conditional") {
  691. return EJsonQueryWrap::ConditionalWrap;
  692. } else {
  693. return EJsonQueryWrap::Wrap;
  694. }
  695. }
  696. EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) {
  697. /*
  698. json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT);
  699. */
  700. switch (node.GetAltCase()) {
  701. case TRule_json_query_handler::kAltJsonQueryHandler1:
  702. return EJsonQueryHandler::Error;
  703. case TRule_json_query_handler::kAltJsonQueryHandler2:
  704. return EJsonQueryHandler::Null;
  705. case TRule_json_query_handler::kAltJsonQueryHandler3:
  706. return EJsonQueryHandler::EmptyArray;
  707. case TRule_json_query_handler::kAltJsonQueryHandler4:
  708. return EJsonQueryHandler::EmptyObject;
  709. case TRule_json_query_handler::ALT_NOT_SET:
  710. Y_ABORT("You should change implementation according to grammar changes");
  711. }
  712. }
  713. TNodePtr TSqlExpression::JsonQueryExpr(const TRule_json_query& node) {
  714. /*
  715. json_query: JSON_QUERY LPAREN
  716. json_common_args
  717. (json_query_wrapper WRAPPER)?
  718. (json_query_handler ON EMPTY)?
  719. (json_query_handler ON ERROR)?
  720. RPAREN;
  721. */
  722. TVector<TNodePtr> children;
  723. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  724. auto addChild = [&](TPosition pos, const TString& content) {
  725. children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default));
  726. };
  727. const auto wrapMode = JsonQueryWrapper(node);
  728. addChild(Ctx.Pos(), ToString(wrapMode));
  729. auto onEmpty = EJsonQueryHandler::Null;
  730. if (node.HasBlock5()) {
  731. if (wrapMode != EJsonQueryWrap::NoWrap) {
  732. Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified";
  733. Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper");
  734. return nullptr;
  735. }
  736. onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1());
  737. }
  738. addChild(Ctx.Pos(), ToString(onEmpty));
  739. auto onError = EJsonQueryHandler::Null;
  740. if (node.HasBlock6()) {
  741. onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1());
  742. }
  743. addChild(Ctx.Pos(), ToString(onError));
  744. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children);
  745. }
  746. TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) {
  747. /*
  748. json_api_expr: json_value | json_exists | json_query;
  749. */
  750. TPosition pos = Ctx.Pos();
  751. TNodePtr result = nullptr;
  752. switch (node.GetAltCase()) {
  753. case TRule_json_api_expr::kAltJsonApiExpr1: {
  754. const auto& jsonValue = node.GetAlt_json_api_expr1().GetRule_json_value1();
  755. pos = GetPos(jsonValue.GetToken1());
  756. result = JsonValueExpr(jsonValue);
  757. break;
  758. }
  759. case TRule_json_api_expr::kAltJsonApiExpr2: {
  760. const auto& jsonExists = node.GetAlt_json_api_expr2().GetRule_json_exists1();
  761. pos = GetPos(jsonExists.GetToken1());
  762. result = JsonExistsExpr(jsonExists);
  763. break;
  764. }
  765. case TRule_json_api_expr::kAltJsonApiExpr3: {
  766. const auto& jsonQuery = node.GetAlt_json_api_expr3().GetRule_json_query1();
  767. pos = GetPos(jsonQuery.GetToken1());
  768. result = JsonQueryExpr(jsonQuery);
  769. break;
  770. }
  771. case TRule_json_api_expr::ALT_NOT_SET:
  772. Y_ABORT("You should change implementation according to grammar changes");
  773. }
  774. return result;
  775. }
  776. TNodePtr TSqlExpression::RowPatternVarAccess(TString var, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) {
  777. switch (block.GetAltCase()) {
  778. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1:
  779. break;
  780. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2:
  781. break;
  782. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3:
  783. switch (block.GetAlt3().GetRule_an_id_or_type1().GetAltCase()) {
  784. case TRule_an_id_or_type::kAltAnIdOrType1: {
  785. const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1();
  786. switch(idOrType.GetAltCase()) {
  787. case TRule_id_or_type::kAltIdOrType1: {
  788. const auto column = Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this);
  789. return BuildMatchRecognizeColumnAccess(Ctx.Pos(), std::move(var), std::move(column));
  790. }
  791. case TRule_id_or_type::kAltIdOrType2:
  792. break;
  793. case TRule_id_or_type::ALT_NOT_SET:
  794. break;
  795. }
  796. break;
  797. }
  798. case TRule_an_id_or_type::kAltAnIdOrType2:
  799. break;
  800. case TRule_an_id_or_type::ALT_NOT_SET:
  801. break;
  802. }
  803. break;
  804. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
  805. Y_ABORT("You should change implementation according to grammar changes");
  806. }
  807. return {};
  808. }
  809. template<typename TUnaryCasualExprRule>
  810. TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) {
  811. // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix;
  812. // OR
  813. // in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix;
  814. // where
  815. // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?;
  816. const auto& suffix = node.GetRule_unary_subexpr_suffix2();
  817. const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2();
  818. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && suffixIsEmpty;
  819. TString name;
  820. TNodePtr expr;
  821. bool typePossible = false;
  822. auto& block = node.GetBlock1();
  823. switch (block.Alt_case()) {
  824. case TUnaryCasualExprRule::TBlock1::kAlt1: {
  825. MaybeUnnamedSmartParenOnTop = false;
  826. auto& alt = block.GetAlt1();
  827. if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
  828. name = Id(alt.GetRule_id_expr1(), *this);
  829. typePossible = !IsQuotedId(alt.GetRule_id_expr1(), *this);
  830. } else {
  831. // type was never possible here
  832. name = Id(alt.GetRule_id_expr_in1(), *this);
  833. }
  834. break;
  835. }
  836. case TUnaryCasualExprRule::TBlock1::kAlt2: {
  837. auto& alt = block.GetAlt2();
  838. TMaybe<TExprOrIdent> exprOrId;
  839. if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
  840. exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
  841. } else {
  842. MaybeUnnamedSmartParenOnTop = false;
  843. exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
  844. }
  845. if (!exprOrId) {
  846. Ctx.IncrementMonCounter("sql_errors", "BadAtomExpr");
  847. return nullptr;
  848. }
  849. if (!exprOrId->Expr) {
  850. name = exprOrId->Ident;
  851. } else {
  852. expr = exprOrId->Expr;
  853. }
  854. break;
  855. }
  856. case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET:
  857. Y_ABORT("You should change implementation according to grammar changes");
  858. }
  859. // bool onlyDots = true;
  860. bool isColumnRef = !expr;
  861. bool isFirstElem = true;
  862. for (auto& _b : suffix.GetBlock1()) {
  863. auto& b = _b.GetBlock1();
  864. switch (b.Alt_case()) {
  865. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
  866. // key_expr
  867. // onlyDots = false;
  868. break;
  869. }
  870. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
  871. // invoke_expr - cannot be a column, function name
  872. if (isFirstElem) {
  873. isColumnRef = false;
  874. }
  875. // onlyDots = false;
  876. break;
  877. }
  878. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
  879. // In case of MATCH_RECOGNIZE lambdas
  880. // X.Y is treated as Var.Column access
  881. if (isColumnRef && (
  882. EColumnRefState::MatchRecognizeMeasures == Ctx.GetColumnReferenceState() ||
  883. EColumnRefState::MatchRecognizeDefine == Ctx.GetColumnReferenceState() ||
  884. EColumnRefState::MatchRecognizeDefineAggregate == Ctx.GetColumnReferenceState()
  885. )) {
  886. if (suffix.GetBlock1().size() != 1) {
  887. Ctx.Error() << "Expected Var.Column, but got chain of " << suffix.GetBlock1().size() << " column accesses";
  888. return nullptr;
  889. }
  890. return RowPatternVarAccess(std::move(name), b.GetAlt3().GetBlock2());
  891. }
  892. break;
  893. }
  894. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
  895. AltNotImplemented("unary_subexpr_suffix", b);
  896. return nullptr;
  897. }
  898. isFirstElem = false;
  899. }
  900. isFirstElem = true;
  901. TVector<INode::TIdPart> ids;
  902. INode::TPtr lastExpr;
  903. if (!isColumnRef) {
  904. lastExpr = expr;
  905. } else {
  906. const bool flexibleTypes = Ctx.FlexibleTypes;
  907. bool columnOrType = false;
  908. auto columnRefsState = Ctx.GetColumnReferenceState();
  909. bool explicitPgType = columnRefsState == EColumnRefState::AsPgType;
  910. if (explicitPgType && typePossible && suffixIsEmpty) {
  911. auto pgType = BuildSimpleType(Ctx, Ctx.Pos(), name, false);
  912. if (pgType && tail.Count) {
  913. Ctx.Error() << "Optional types are not supported in this context";
  914. return {};
  915. }
  916. return pgType;
  917. }
  918. if (auto simpleType = LookupSimpleType(name, flexibleTypes, false); simpleType && typePossible && suffixIsEmpty) {
  919. if (tail.Count > 0 || columnRefsState == EColumnRefState::Deny || !flexibleTypes) {
  920. // a type
  921. return AddOptionals(BuildSimpleType(Ctx, Ctx.Pos(), name, false), tail.Count);
  922. }
  923. // type or column: ambiguity will be resolved on type annotation stage
  924. columnOrType = columnRefsState == EColumnRefState::Allow;
  925. }
  926. if (tail.Count) {
  927. UnexpectedQuestionToken(tail);
  928. return {};
  929. }
  930. if (!Ctx.CheckColumnReference(Ctx.Pos(), name)) {
  931. return nullptr;
  932. }
  933. ids.push_back(columnOrType ? BuildColumnOrType(Ctx.Pos()) : BuildColumn(Ctx.Pos()));
  934. ids.push_back(name);
  935. }
  936. TPosition pos(Ctx.Pos());
  937. for (auto& _b : suffix.GetBlock1()) {
  938. auto& b = _b.GetBlock1();
  939. switch (b.Alt_case()) {
  940. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
  941. // key_expr
  942. auto keyExpr = KeyExpr(b.GetAlt1().GetRule_key_expr1());
  943. if (!keyExpr) {
  944. Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr");
  945. return nullptr;
  946. }
  947. if (!lastExpr) {
  948. lastExpr = BuildAccess(pos, ids, false);
  949. ids.clear();
  950. }
  951. ids.push_back(lastExpr);
  952. ids.push_back(keyExpr);
  953. lastExpr = BuildAccess(pos, ids, true);
  954. ids.clear();
  955. break;
  956. }
  957. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
  958. // invoke_expr - cannot be a column, function name
  959. TSqlCallExpr call(Ctx, Mode);
  960. if (isFirstElem && !name.empty()) {
  961. call.AllowDistinct();
  962. call.InitName(name);
  963. } else {
  964. call.InitExpr(lastExpr);
  965. }
  966. bool initRet = call.Init(b.GetAlt2().GetRule_invoke_expr1());
  967. if (initRet) {
  968. call.IncCounters();
  969. }
  970. if (!initRet) {
  971. return nullptr;
  972. }
  973. lastExpr = call.BuildCall();
  974. if (!lastExpr) {
  975. return nullptr;
  976. }
  977. break;
  978. }
  979. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
  980. // dot
  981. if (lastExpr) {
  982. ids.push_back(lastExpr);
  983. }
  984. auto bb = b.GetAlt3().GetBlock2();
  985. switch (bb.Alt_case()) {
  986. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1: {
  987. TString named;
  988. if (!NamedNodeImpl(bb.GetAlt1().GetRule_bind_parameter1(), named, *this)) {
  989. return nullptr;
  990. }
  991. auto namedNode = GetNamedNode(named);
  992. if (!namedNode) {
  993. return nullptr;
  994. }
  995. ids.push_back(named);
  996. ids.back().Expr = namedNode;
  997. break;
  998. }
  999. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2: {
  1000. const TString str(Token(bb.GetAlt2().GetToken1()));
  1001. ids.push_back(str);
  1002. break;
  1003. }
  1004. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3: {
  1005. ids.push_back(Id(bb.GetAlt3().GetRule_an_id_or_type1(), *this));
  1006. break;
  1007. }
  1008. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
  1009. Y_ABORT("You should change implementation according to grammar changes");
  1010. }
  1011. if (lastExpr) {
  1012. lastExpr = BuildAccess(pos, ids, false);
  1013. ids.clear();
  1014. }
  1015. break;
  1016. }
  1017. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
  1018. AltNotImplemented("unary_subexpr_suffix", b);
  1019. return nullptr;
  1020. }
  1021. isFirstElem = false;
  1022. }
  1023. if (!lastExpr) {
  1024. lastExpr = BuildAccess(pos, ids, false);
  1025. ids.clear();
  1026. }
  1027. if (suffix.HasBlock2()) {
  1028. Ctx.IncrementMonCounter("sql_errors", "CollateUnarySubexpr");
  1029. Error() << "unary_subexpr: COLLATE is not implemented yet";
  1030. }
  1031. return lastExpr;
  1032. }
  1033. TNodePtr TSqlExpression::BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail) {
  1034. TString namedArg;
  1035. if (!NamedNodeImpl(rule, namedArg, *this)) {
  1036. return {};
  1037. }
  1038. if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
  1039. Ctx.IncrementMonCounter("sql_features", "LambdaArgument");
  1040. if (tail.Count > 1) {
  1041. Ctx.Error(tail.Pos) << "Expecting at most one '?' token here (for optional lambda parameters), but got " << tail.Count;
  1042. return {};
  1043. }
  1044. return BuildAtom(Ctx.Pos(), namedArg, NYql::TNodeFlags::ArbitraryContent, tail.Count != 0);
  1045. }
  1046. if (tail.Count) {
  1047. UnexpectedQuestionToken(tail);
  1048. return {};
  1049. }
  1050. Ctx.IncrementMonCounter("sql_features", "NamedNodeUseAtom");
  1051. auto ret = GetNamedNode(namedArg);
  1052. if (ret) {
  1053. ret->SetRefPos(Ctx.Pos());
  1054. }
  1055. return ret;
  1056. }
  1057. TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) {
  1058. const auto& alt = rule;
  1059. const bool isSqlLambda = alt.HasBlock2();
  1060. if (!isSqlLambda) {
  1061. return SmartParenthesis(alt.GetRule_smart_parenthesis1());
  1062. }
  1063. MaybeUnnamedSmartParenOnTop = false;
  1064. TNodePtr parenthesis;
  1065. {
  1066. // we allow column reference here to postpone error and report it with better description in SqlLambdaParams
  1067. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  1068. TSqlExpression expr(Ctx, Mode);
  1069. expr.SetSmartParenthesisMode(ESmartParenthesis::SqlLambdaParams);
  1070. parenthesis = expr.SmartParenthesis(alt.GetRule_smart_parenthesis1());
  1071. }
  1072. if (!parenthesis) {
  1073. return {};
  1074. }
  1075. ui32 optionalArgumentsCount = 0;
  1076. TVector<TSymbolNameWithPos> args;
  1077. if (!SqlLambdaParams(parenthesis, args, optionalArgumentsCount)) {
  1078. return {};
  1079. }
  1080. auto bodyBlock = alt.GetBlock2();
  1081. Token(bodyBlock.GetToken1());
  1082. TPosition pos(Ctx.Pos());
  1083. TVector<TNodePtr> exprSeq;
  1084. for (auto& arg: args) {
  1085. arg.Name = PushNamedAtom(arg.Pos, arg.Name);
  1086. }
  1087. bool ret = false;
  1088. TColumnRefScope scope(Ctx, EColumnRefState::Deny);
  1089. scope.SetNoColumnErrContext("in lambda function");
  1090. if (bodyBlock.GetBlock2().HasAlt1()) {
  1091. ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt1().GetRule_expr2(), exprSeq);
  1092. } else {
  1093. ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt2().GetRule_lambda_body2(), exprSeq);
  1094. }
  1095. TVector<TString> argNames;
  1096. for (const auto& arg : args) {
  1097. argNames.push_back(arg.Name);
  1098. PopNamedNode(arg.Name);
  1099. }
  1100. if (!ret) {
  1101. return {};
  1102. }
  1103. auto lambdaNode = BuildSqlLambda(pos, std::move(argNames), std::move(exprSeq));
  1104. if (optionalArgumentsCount > 0) {
  1105. lambdaNode = new TCallNodeImpl(pos, "WithOptionalArgs", {
  1106. lambdaNode,
  1107. BuildQuotedAtom(pos, ToString(optionalArgumentsCount), TNodeFlags::Default)
  1108. });
  1109. }
  1110. return lambdaNode;
  1111. }
  1112. TNodePtr TSqlExpression::CastRule(const TRule_cast_expr& rule) {
  1113. Ctx.IncrementMonCounter("sql_features", "Cast");
  1114. const auto& alt = rule;
  1115. Token(alt.GetToken1());
  1116. TPosition pos(Ctx.Pos());
  1117. TSqlExpression expr(Ctx, Mode);
  1118. auto exprNode = expr.Build(rule.GetRule_expr3());
  1119. if (!exprNode) {
  1120. return {};
  1121. }
  1122. auto type = TypeNodeOrBind(rule.GetRule_type_name_or_bind5());
  1123. if (!type) {
  1124. return {};
  1125. }
  1126. return new TCallNodeImpl(pos, "SafeCast", {exprNode, type});
  1127. }
  1128. TNodePtr TSqlExpression::BitCastRule(const TRule_bitcast_expr& rule) {
  1129. Ctx.IncrementMonCounter("sql_features", "BitCast");
  1130. const auto& alt = rule;
  1131. Token(alt.GetToken1());
  1132. TPosition pos(Ctx.Pos());
  1133. TSqlExpression expr(Ctx, Mode);
  1134. auto exprNode = expr.Build(rule.GetRule_expr3());
  1135. if (!exprNode) {
  1136. return {};
  1137. }
  1138. auto type = TypeSimple(rule.GetRule_type_name_simple5(), true);
  1139. if (!type) {
  1140. return {};
  1141. }
  1142. return new TCallNodeImpl(pos, "BitCast", {exprNode, type});
  1143. }
  1144. TNodePtr TSqlExpression::ExistsRule(const TRule_exists_expr& rule) {
  1145. Ctx.IncrementMonCounter("sql_features", "Exists");
  1146. TPosition pos;
  1147. TSourcePtr source;
  1148. Token(rule.GetToken2());
  1149. switch (rule.GetBlock3().Alt_case()) {
  1150. case TRule_exists_expr::TBlock3::kAlt1: {
  1151. const auto& alt = rule.GetBlock3().GetAlt1().GetRule_select_stmt1();
  1152. TSqlSelect select(Ctx, Mode);
  1153. source = select.Build(alt, pos);
  1154. break;
  1155. }
  1156. case TRule_exists_expr::TBlock3::kAlt2: {
  1157. const auto& alt = rule.GetBlock3().GetAlt2().GetRule_values_stmt1();
  1158. TSqlValues values(Ctx, Mode);
  1159. source = values.Build(alt, pos);
  1160. break;
  1161. }
  1162. case TRule_exists_expr::TBlock3::ALT_NOT_SET:
  1163. AltNotImplemented("exists_expr", rule.GetBlock3());
  1164. }
  1165. if (!source) {
  1166. Ctx.IncrementMonCounter("sql_errors", "BadSource");
  1167. return nullptr;
  1168. }
  1169. const bool checkExist = true;
  1170. auto select = BuildSourceNode(Ctx.Pos(), source, checkExist, Ctx.Settings.EmitReadsForExists);
  1171. return BuildBuiltinFunc(Ctx, Ctx.Pos(), "ListHasItems", {select});
  1172. }
  1173. TNodePtr TSqlExpression::CaseRule(const TRule_case_expr& rule) {
  1174. // case_expr: CASE expr? when_expr+ (ELSE expr)? END;
  1175. // when_expr: WHEN expr THEN expr;
  1176. Ctx.IncrementMonCounter("sql_features", "Case");
  1177. const auto& alt = rule;
  1178. Token(alt.GetToken1());
  1179. TNodePtr elseExpr;
  1180. if (alt.HasBlock4()) {
  1181. Token(alt.GetBlock4().GetToken1());
  1182. TSqlExpression expr(Ctx, Mode);
  1183. elseExpr = expr.Build(alt.GetBlock4().GetRule_expr2());
  1184. } else {
  1185. Ctx.IncrementMonCounter("sql_errors", "ElseIsRequired");
  1186. Error() << "ELSE is required";
  1187. return {};
  1188. }
  1189. TNodePtr caseExpr;
  1190. if (alt.HasBlock2()) {
  1191. TSqlExpression expr(Ctx, Mode);
  1192. caseExpr = expr.Build(alt.GetBlock2().GetRule_expr1());
  1193. if (!caseExpr) {
  1194. return {};
  1195. }
  1196. }
  1197. TVector<TCaseBranch> branches;
  1198. for (size_t i = 0; i < alt.Block3Size(); ++i) {
  1199. branches.emplace_back();
  1200. const auto& block = alt.GetBlock3(i).GetRule_when_expr1();
  1201. Token(block.GetToken1());
  1202. TSqlExpression condExpr(Ctx, Mode);
  1203. branches.back().Pred = condExpr.Build(block.GetRule_expr2());
  1204. if (caseExpr) {
  1205. branches.back().Pred = BuildBinaryOp(Ctx, Ctx.Pos(), "==", caseExpr->Clone(), branches.back().Pred);
  1206. }
  1207. if (!branches.back().Pred) {
  1208. return {};
  1209. }
  1210. Token(block.GetToken3());
  1211. TSqlExpression thenExpr(Ctx, Mode);
  1212. branches.back().Value = thenExpr.Build(block.GetRule_expr4());
  1213. if (!branches.back().Value) {
  1214. return {};
  1215. }
  1216. }
  1217. auto final = ReduceCaseBranches(branches.begin(), branches.end());
  1218. return BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { final.Pred, final.Value, elseExpr });
  1219. }
  1220. TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail) {
  1221. // atom_expr:
  1222. // literal_value
  1223. // | bind_parameter
  1224. // | lambda
  1225. // | cast_expr
  1226. // | exists_expr
  1227. // | case_expr
  1228. // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
  1229. // | value_constructor
  1230. // | bitcast_expr
  1231. // | list_literal
  1232. // | dict_literal
  1233. // | struct_literal
  1234. // ;
  1235. if (node.Alt_case() != TRule_atom_expr::kAltAtomExpr2 && tail.Count) {
  1236. UnexpectedQuestionToken(tail);
  1237. return {};
  1238. }
  1239. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (node.Alt_case() == TRule_atom_expr::kAltAtomExpr3);
  1240. TExprOrIdent result;
  1241. switch (node.Alt_case()) {
  1242. case TRule_atom_expr::kAltAtomExpr1:
  1243. Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
  1244. return LiteralExpr(node.GetAlt_atom_expr1().GetRule_literal_value1());
  1245. case TRule_atom_expr::kAltAtomExpr2:
  1246. result.Expr = BindParameterRule(node.GetAlt_atom_expr2().GetRule_bind_parameter1(), tail);
  1247. break;
  1248. case TRule_atom_expr::kAltAtomExpr3:
  1249. result.Expr = LambdaRule(node.GetAlt_atom_expr3().GetRule_lambda1());
  1250. break;
  1251. case TRule_atom_expr::kAltAtomExpr4:
  1252. result.Expr = CastRule(node.GetAlt_atom_expr4().GetRule_cast_expr1());
  1253. break;
  1254. case TRule_atom_expr::kAltAtomExpr5:
  1255. result.Expr = ExistsRule(node.GetAlt_atom_expr5().GetRule_exists_expr1());
  1256. break;
  1257. case TRule_atom_expr::kAltAtomExpr6:
  1258. result.Expr = CaseRule(node.GetAlt_atom_expr6().GetRule_case_expr1());
  1259. break;
  1260. case TRule_atom_expr::kAltAtomExpr7: {
  1261. const auto& alt = node.GetAlt_atom_expr7();
  1262. TString module(Id(alt.GetRule_an_id_or_type1(), *this));
  1263. TPosition pos(Ctx.Pos());
  1264. TString name;
  1265. switch (alt.GetBlock3().Alt_case()) {
  1266. case TRule_atom_expr::TAlt7::TBlock3::kAlt1:
  1267. name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
  1268. break;
  1269. case TRule_atom_expr::TAlt7::TBlock3::kAlt2: {
  1270. name = Token(alt.GetBlock3().GetAlt2().GetToken1());
  1271. if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
  1272. // same as previous case
  1273. name = IdContentFromString(Ctx, name);
  1274. } else {
  1275. module = "@" + module;
  1276. }
  1277. break;
  1278. }
  1279. case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET:
  1280. Y_ABORT("Unsigned number: you should change implementation according to grammar changes");
  1281. }
  1282. result.Expr = BuildCallable(pos, module, name, {});
  1283. break;
  1284. }
  1285. case TRule_atom_expr::kAltAtomExpr8: {
  1286. result.Expr = ValueConstructor(node.GetAlt_atom_expr8().GetRule_value_constructor1());
  1287. break;
  1288. }
  1289. case TRule_atom_expr::kAltAtomExpr9:
  1290. result.Expr = BitCastRule(node.GetAlt_atom_expr9().GetRule_bitcast_expr1());
  1291. break;
  1292. case TRule_atom_expr::kAltAtomExpr10:
  1293. result.Expr = ListLiteral(node.GetAlt_atom_expr10().GetRule_list_literal1());
  1294. break;
  1295. case TRule_atom_expr::kAltAtomExpr11:
  1296. result.Expr = DictLiteral(node.GetAlt_atom_expr11().GetRule_dict_literal1());
  1297. break;
  1298. case TRule_atom_expr::kAltAtomExpr12:
  1299. result.Expr = StructLiteral(node.GetAlt_atom_expr12().GetRule_struct_literal1());
  1300. break;
  1301. case TRule_atom_expr::ALT_NOT_SET:
  1302. AltNotImplemented("atom_expr", node);
  1303. }
  1304. if (!result.Expr) {
  1305. return {};
  1306. }
  1307. return result;
  1308. }
  1309. TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail) {
  1310. // in_atom_expr:
  1311. // literal_value
  1312. // | bind_parameter
  1313. // | lambda
  1314. // | cast_expr
  1315. // | case_expr
  1316. // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
  1317. // | LPAREN select_stmt RPAREN
  1318. // | value_constructor
  1319. // | bitcast_expr
  1320. // | list_literal
  1321. // | dict_literal
  1322. // | struct_literal
  1323. // ;
  1324. if (node.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr2 && tail.Count) {
  1325. UnexpectedQuestionToken(tail);
  1326. return {};
  1327. }
  1328. TExprOrIdent result;
  1329. switch (node.Alt_case()) {
  1330. case TRule_in_atom_expr::kAltInAtomExpr1:
  1331. Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
  1332. return LiteralExpr(node.GetAlt_in_atom_expr1().GetRule_literal_value1());
  1333. case TRule_in_atom_expr::kAltInAtomExpr2:
  1334. result.Expr = BindParameterRule(node.GetAlt_in_atom_expr2().GetRule_bind_parameter1(), tail);
  1335. break;
  1336. case TRule_in_atom_expr::kAltInAtomExpr3:
  1337. result.Expr = LambdaRule(node.GetAlt_in_atom_expr3().GetRule_lambda1());
  1338. break;
  1339. case TRule_in_atom_expr::kAltInAtomExpr4:
  1340. result.Expr = CastRule(node.GetAlt_in_atom_expr4().GetRule_cast_expr1());
  1341. break;
  1342. case TRule_in_atom_expr::kAltInAtomExpr5:
  1343. result.Expr = CaseRule(node.GetAlt_in_atom_expr5().GetRule_case_expr1());
  1344. break;
  1345. case TRule_in_atom_expr::kAltInAtomExpr6: {
  1346. const auto& alt = node.GetAlt_in_atom_expr6();
  1347. TString module(Id(alt.GetRule_an_id_or_type1(), *this));
  1348. TPosition pos(Ctx.Pos());
  1349. TString name;
  1350. switch (alt.GetBlock3().Alt_case()) {
  1351. case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1:
  1352. name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
  1353. break;
  1354. case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: {
  1355. name = Token(alt.GetBlock3().GetAlt2().GetToken1());
  1356. if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
  1357. // same as previous case
  1358. name = IdContentFromString(Ctx, name);
  1359. } else {
  1360. module = "@" + module;
  1361. }
  1362. break;
  1363. }
  1364. case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET:
  1365. Y_ABORT("You should change implementation according to grammar changes");
  1366. }
  1367. result.Expr = BuildCallable(pos, module, name, {});
  1368. break;
  1369. }
  1370. case TRule_in_atom_expr::kAltInAtomExpr7: {
  1371. Token(node.GetAlt_in_atom_expr7().GetToken1());
  1372. // reset column reference scope (select will reenable it where needed)
  1373. TColumnRefScope scope(Ctx, EColumnRefState::Deny);
  1374. TSqlSelect select(Ctx, Mode);
  1375. TPosition pos;
  1376. auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos);
  1377. if (!source) {
  1378. Ctx.IncrementMonCounter("sql_errors", "BadSource");
  1379. return {};
  1380. }
  1381. Ctx.IncrementMonCounter("sql_features", "InSubquery");
  1382. const auto alias = Ctx.MakeName("subquerynode");
  1383. const auto ref = Ctx.MakeName("subquery");
  1384. auto& blocks = Ctx.GetCurrentBlocks();
  1385. blocks.push_back(BuildSubquery(std::move(source), alias, Mode == NSQLTranslation::ESqlMode::SUBQUERY, -1, Ctx.Scoped));
  1386. blocks.back()->SetLabel(ref);
  1387. result.Expr = BuildSubqueryRef(blocks.back(), ref, -1);
  1388. break;
  1389. }
  1390. case TRule_in_atom_expr::kAltInAtomExpr8: {
  1391. result.Expr = ValueConstructor(node.GetAlt_in_atom_expr8().GetRule_value_constructor1());
  1392. break;
  1393. }
  1394. case TRule_in_atom_expr::kAltInAtomExpr9:
  1395. result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1());
  1396. break;
  1397. case TRule_in_atom_expr::kAltInAtomExpr10:
  1398. result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1());
  1399. break;
  1400. case TRule_in_atom_expr::kAltInAtomExpr11:
  1401. result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1());
  1402. break;
  1403. case TRule_in_atom_expr::kAltInAtomExpr12:
  1404. result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1());
  1405. break;
  1406. case TRule_in_atom_expr::ALT_NOT_SET:
  1407. AltNotImplemented("in_atom_expr", node);
  1408. }
  1409. if (!result.Expr) {
  1410. return {};
  1411. }
  1412. return result;
  1413. }
  1414. bool TSqlExpression::SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount) {
  1415. args.clear();
  1416. optionalArgumentsCount = 0;
  1417. auto errMsg = TStringBuf("Invalid lambda arguments syntax. Lambda arguments should start with '$' as named value.");
  1418. auto tupleNodePtr = node->GetTupleNode();;
  1419. if (!tupleNodePtr) {
  1420. Ctx.Error(node->GetPos()) << errMsg;
  1421. return false;
  1422. }
  1423. THashSet<TString> dupArgsChecker;
  1424. for (const auto& argPtr: tupleNodePtr->Elements()) {
  1425. auto contentPtr = argPtr->GetAtomContent();
  1426. if (!contentPtr || !contentPtr->StartsWith("$")) {
  1427. Ctx.Error(argPtr->GetPos()) << errMsg;
  1428. return false;
  1429. }
  1430. if (argPtr->IsOptionalArg()) {
  1431. ++optionalArgumentsCount;
  1432. } else if (optionalArgumentsCount > 0) {
  1433. Ctx.Error(argPtr->GetPos()) << "Non-optional argument can not follow optional one";
  1434. return false;
  1435. }
  1436. if (!IsAnonymousName(*contentPtr) && !dupArgsChecker.insert(*contentPtr).second) {
  1437. Ctx.Error(argPtr->GetPos()) << "Duplicate lambda argument parametr: '" << *contentPtr << "'.";
  1438. return false;
  1439. }
  1440. args.push_back(TSymbolNameWithPos{*contentPtr, argPtr->GetPos()});
  1441. }
  1442. return true;
  1443. }
  1444. bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq) {
  1445. TSqlExpression expr(ctx, ctx.Settings.Mode);
  1446. TNodePtr nodeExpr = expr.Build(node);
  1447. if (!nodeExpr) {
  1448. return false;
  1449. }
  1450. exprSeq.push_back(nodeExpr);
  1451. return true;
  1452. }
  1453. bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq) {
  1454. TSqlExpression expr(ctx, ctx.Settings.Mode);
  1455. TVector<TString> localNames;
  1456. bool hasError = false;
  1457. for (auto& block: node.GetBlock2()) {
  1458. const auto& rule = block.GetRule_lambda_stmt1();
  1459. switch (rule.Alt_case()) {
  1460. case TRule_lambda_stmt::kAltLambdaStmt1: {
  1461. TVector<TSymbolNameWithPos> names;
  1462. auto nodeExpr = NamedNode(rule.GetAlt_lambda_stmt1().GetRule_named_nodes_stmt1(), names);
  1463. if (!nodeExpr) {
  1464. hasError = true;
  1465. continue;
  1466. } else if (nodeExpr->GetSource()) {
  1467. ctx.Error() << "SELECT is not supported inside lambda body";
  1468. hasError = true;
  1469. continue;
  1470. }
  1471. if (names.size() > 1) {
  1472. auto ref = ctx.MakeName("tie");
  1473. exprSeq.push_back(nodeExpr->Y("EnsureTupleSize", nodeExpr, nodeExpr->Q(ToString(names.size()))));
  1474. exprSeq.back()->SetLabel(ref);
  1475. for (size_t i = 0; i < names.size(); ++i) {
  1476. TNodePtr nthExpr = nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i)));
  1477. names[i].Name = PushNamedAtom(names[i].Pos, names[i].Name);
  1478. nthExpr->SetLabel(names[i].Name);
  1479. localNames.push_back(names[i].Name);
  1480. exprSeq.push_back(nthExpr);
  1481. }
  1482. } else {
  1483. auto& symbol = names.front();
  1484. symbol.Name = PushNamedAtom(symbol.Pos, symbol.Name);
  1485. nodeExpr->SetLabel(symbol.Name);
  1486. localNames.push_back(symbol.Name);
  1487. exprSeq.push_back(nodeExpr);
  1488. }
  1489. break;
  1490. }
  1491. case TRule_lambda_stmt::kAltLambdaStmt2: {
  1492. if (!ImportStatement(rule.GetAlt_lambda_stmt2().GetRule_import_stmt1(), &localNames)) {
  1493. hasError = true;
  1494. }
  1495. break;
  1496. }
  1497. case TRule_lambda_stmt::ALT_NOT_SET:
  1498. Y_ABORT("SampleClause: does not correspond to grammar changes");
  1499. }
  1500. }
  1501. TNodePtr nodeExpr;
  1502. if (!hasError) {
  1503. nodeExpr = expr.Build(node.GetRule_expr4());
  1504. }
  1505. for (const auto& name : localNames) {
  1506. PopNamedNode(name);
  1507. }
  1508. if (!nodeExpr) {
  1509. return false;
  1510. }
  1511. exprSeq.push_back(nodeExpr);
  1512. return true;
  1513. }
  1514. TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail) {
  1515. // con_subexpr: unary_subexpr | unary_op unary_subexpr;
  1516. switch (node.Alt_case()) {
  1517. case TRule_con_subexpr::kAltConSubexpr1:
  1518. return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1(), tail);
  1519. case TRule_con_subexpr::kAltConSubexpr2: {
  1520. MaybeUnnamedSmartParenOnTop = false;
  1521. Ctx.IncrementMonCounter("sql_features", "UnaryOperation");
  1522. TString opName;
  1523. auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1();
  1524. Token(token);
  1525. TPosition pos(Ctx.Pos());
  1526. auto tokenId = token.GetId();
  1527. if (IS_TOKEN(tokenId, NOT)) {
  1528. opName = "Not";
  1529. } else if (IS_TOKEN(tokenId, PLUS)) {
  1530. opName = "Plus";
  1531. } else if (IS_TOKEN(tokenId, MINUS)) {
  1532. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMinus" : "Minus";
  1533. } else if (IS_TOKEN(tokenId, TILDA)) {
  1534. opName = "BitNot";
  1535. } else {
  1536. Ctx.IncrementMonCounter("sql_errors", "UnsupportedUnaryOperation");
  1537. Error() << "Unsupported unary operation: " << token.GetValue();
  1538. return nullptr;
  1539. }
  1540. Ctx.IncrementMonCounter("sql_unary_operations", opName);
  1541. auto expr = UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2(), tail);
  1542. return expr ? expr->ApplyUnaryOp(Ctx, pos, opName) : expr;
  1543. }
  1544. case TRule_con_subexpr::ALT_NOT_SET:
  1545. Y_ABORT("You should change implementation according to grammar changes");
  1546. }
  1547. return nullptr;
  1548. }
  1549. TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail) {
  1550. // xor_subexpr: eq_subexpr cond_expr?;
  1551. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock2();
  1552. TNodePtr res(SubExpr(node.GetRule_eq_subexpr1(), node.HasBlock2() ? TTrailingQuestions{} : tail));
  1553. if (!res) {
  1554. return {};
  1555. }
  1556. TPosition pos(Ctx.Pos());
  1557. if (node.HasBlock2()) {
  1558. auto cond = node.GetBlock2().GetRule_cond_expr1();
  1559. switch (cond.Alt_case()) {
  1560. case TRule_cond_expr::kAltCondExpr1: {
  1561. const auto& matchOp = cond.GetAlt_cond_expr1();
  1562. const bool notMatch = matchOp.HasBlock1();
  1563. const TCiString& opName = Token(matchOp.GetRule_match_op2().GetToken1());
  1564. const auto& pattern = SubExpr(cond.GetAlt_cond_expr1().GetRule_eq_subexpr3(), matchOp.HasBlock4() ? TTrailingQuestions{} : tail);
  1565. if (!pattern) {
  1566. return {};
  1567. }
  1568. TNodePtr isMatch;
  1569. if (opName == "like" || opName == "ilike") {
  1570. const TString* escapeLiteral = nullptr;
  1571. TNodePtr escapeNode;
  1572. const auto& escaper = BuildUdf(Ctx, pos, "Re2", "PatternFromLike", {});
  1573. TVector<TNodePtr> escaperArgs({ escaper, pattern });
  1574. if (matchOp.HasBlock4()) {
  1575. const auto& escapeBlock = matchOp.GetBlock4();
  1576. TNodePtr escapeExpr = SubExpr(escapeBlock.GetRule_eq_subexpr2(), tail);
  1577. if (!escapeExpr) {
  1578. return {};
  1579. }
  1580. escapeLiteral = escapeExpr->GetLiteral("String");
  1581. escapeNode = escapeExpr;
  1582. if (escapeLiteral) {
  1583. Ctx.IncrementMonCounter("sql_features", "LikeEscape");
  1584. if (escapeLiteral->size() != 1) {
  1585. Ctx.IncrementMonCounter("sql_errors", "LikeMultiCharEscape");
  1586. Error() << "ESCAPE clause requires single character argument";
  1587. return nullptr;
  1588. }
  1589. if (escapeLiteral[0] == "%" || escapeLiteral[0] == "_" || escapeLiteral[0] == "\\") {
  1590. Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
  1591. Error() << "'%', '_' and '\\' are currently not supported in ESCAPE clause, ";
  1592. Error() << "please choose any other character";
  1593. return nullptr;
  1594. }
  1595. if (!IsAscii(escapeLiteral->front())) {
  1596. Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
  1597. Error() << "Non-ASCII symbols are not supported in ESCAPE clause, ";
  1598. Error() << "please choose ASCII character";
  1599. return nullptr;
  1600. }
  1601. escaperArgs.push_back(BuildLiteralRawString(pos, *escapeLiteral));
  1602. } else {
  1603. Ctx.IncrementMonCounter("sql_errors", "LikeNotLiteralEscape");
  1604. Error() << "ESCAPE clause requires String literal argument";
  1605. return nullptr;
  1606. }
  1607. }
  1608. auto re2options = BuildUdf(Ctx, pos, "Re2", "Options", {});
  1609. if (opName == "ilike") {
  1610. Ctx.IncrementMonCounter("sql_features", "CaseInsensitiveLike");
  1611. }
  1612. auto csModeLiteral = BuildLiteralBool(pos, opName != "ilike");
  1613. csModeLiteral->SetLabel("CaseSensitive");
  1614. auto csOption = BuildStructure(pos, { csModeLiteral });
  1615. auto optionsApply = new TCallNodeImpl(pos, "NamedApply", { re2options, BuildTuple(pos, {}), csOption });
  1616. const TNodePtr escapedPattern = new TCallNodeImpl(pos, "Apply", { escaperArgs });
  1617. auto list = new TAstListNodeImpl(pos, { escapedPattern, optionsApply });
  1618. auto runConfig = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), list });
  1619. const TNodePtr matcher = new TCallNodeImpl(pos, "AssumeStrict", { BuildUdf(Ctx, pos, "Re2", "Match", { runConfig }) });
  1620. isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
  1621. bool isUtf8 = false;
  1622. const TString* literalPattern = pattern->GetLiteral("String");
  1623. if (!literalPattern) {
  1624. literalPattern = pattern->GetLiteral("Utf8");
  1625. isUtf8 = literalPattern != nullptr;
  1626. }
  1627. if (literalPattern) {
  1628. bool inEscape = false;
  1629. TMaybe<char> escape;
  1630. if (escapeLiteral) {
  1631. escape = escapeLiteral->front();
  1632. }
  1633. bool mayIgnoreCase;
  1634. TVector<TPatternComponent<char>> components;
  1635. if (isUtf8) {
  1636. auto splitResult = SplitPattern(UTF8ToUTF32<false>(*literalPattern), escape, inEscape);
  1637. for (const auto& component : splitResult) {
  1638. TPatternComponent<char> converted;
  1639. converted.IsSimple = component.IsSimple;
  1640. converted.Prefix = WideToUTF8(component.Prefix);
  1641. converted.Suffix = WideToUTF8(component.Suffix);
  1642. components.push_back(std::move(converted));
  1643. }
  1644. mayIgnoreCase = ToLowerUTF8(*literalPattern) == ToUpperUTF8(*literalPattern);
  1645. } else {
  1646. components = SplitPattern(*literalPattern, escape, inEscape);
  1647. mayIgnoreCase = WithoutAlpha(*literalPattern);
  1648. }
  1649. if (inEscape) {
  1650. Ctx.IncrementMonCounter("sql_errors", "LikeEscapeSymbolEnd");
  1651. Error() << "LIKE pattern should not end with escape symbol";
  1652. return nullptr;
  1653. }
  1654. if (opName == "like" || mayIgnoreCase) {
  1655. // TODO: expand LIKE in optimizers - we can analyze argument types there
  1656. YQL_ENSURE(!components.empty());
  1657. const auto& first = components.front();
  1658. if (components.size() == 1 && first.IsSimple) {
  1659. // no '%'s and '_'s in pattern
  1660. YQL_ENSURE(first.Prefix == first.Suffix);
  1661. isMatch = BuildBinaryOp(Ctx, pos, "==", res, BuildLiteralRawString(pos, first.Suffix, isUtf8));
  1662. } else if (!first.Prefix.empty()) {
  1663. const TString& prefix = first.Prefix;
  1664. TNodePtr prefixMatch;
  1665. if (Ctx.EmitStartsWith) {
  1666. prefixMatch = BuildBinaryOp(Ctx, pos, "StartsWith", res, BuildLiteralRawString(pos, prefix, isUtf8));
  1667. } else {
  1668. prefixMatch = BuildBinaryOp(Ctx, pos, ">=", res, BuildLiteralRawString(pos, prefix, isUtf8));
  1669. auto upperBound = isUtf8 ? NextValidUtf8(prefix) : NextLexicographicString(prefix);
  1670. if (upperBound) {
  1671. prefixMatch = BuildBinaryOp(
  1672. Ctx,
  1673. pos,
  1674. "And",
  1675. prefixMatch,
  1676. BuildBinaryOp(Ctx, pos, "<", res, BuildLiteralRawString(pos, TString(*upperBound), isUtf8))
  1677. );
  1678. }
  1679. }
  1680. if (Ctx.AnsiLike && first.IsSimple && components.size() == 2 && components.back().IsSimple) {
  1681. const TString& suffix = components.back().Suffix;
  1682. // 'prefix%suffix'
  1683. if (suffix.empty()) {
  1684. isMatch = prefixMatch;
  1685. } else {
  1686. // len(str) >= len(prefix) + len(suffix) && StartsWith(str, prefix) && EndsWith(str, suffix)
  1687. TNodePtr sizePred = BuildBinaryOp(Ctx, pos, ">=",
  1688. TNodePtr(new TCallNodeImpl(pos, "Size", { res })),
  1689. TNodePtr(new TLiteralNumberNode<ui32>(pos, "Uint32", ToString(prefix.size() + suffix.size()))));
  1690. TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
  1691. isMatch = new TCallNodeImpl(pos, "And", {
  1692. sizePred,
  1693. prefixMatch,
  1694. suffixMatch
  1695. });
  1696. }
  1697. } else {
  1698. isMatch = BuildBinaryOp(Ctx, pos, "And", prefixMatch, isMatch);
  1699. }
  1700. } else if (Ctx.AnsiLike && AllOf(components, [](const auto& comp) { return comp.IsSimple; })) {
  1701. YQL_ENSURE(first.Prefix.empty());
  1702. if (components.size() == 3 && components.back().Prefix.empty()) {
  1703. // '%foo%'
  1704. YQL_ENSURE(!components[1].Prefix.empty());
  1705. isMatch = BuildBinaryOp(Ctx, pos, "StringContains", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
  1706. } else if (components.size() == 2) {
  1707. // '%foo'
  1708. isMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
  1709. }
  1710. } else if (Ctx.AnsiLike && !components.back().Suffix.empty()) {
  1711. const TString& suffix = components.back().Suffix;
  1712. TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
  1713. isMatch = BuildBinaryOp(Ctx, pos, "And", suffixMatch, isMatch);
  1714. }
  1715. // TODO: more StringContains/StartsWith/EndsWith cases?
  1716. }
  1717. }
  1718. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotLike" : "Like");
  1719. } else if (opName == "regexp" || opName == "rlike" || opName == "match") {
  1720. if (matchOp.HasBlock4()) {
  1721. Ctx.IncrementMonCounter("sql_errors", "RegexpEscape");
  1722. TString opNameUpper(opName);
  1723. opNameUpper.to_upper();
  1724. Error() << opName << " and ESCAPE clauses should not be used together";
  1725. return nullptr;
  1726. }
  1727. if (!Ctx.PragmaRegexUseRe2) {
  1728. Ctx.Warning(pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';";
  1729. }
  1730. const auto& matcher = Ctx.PragmaRegexUseRe2 ?
  1731. BuildUdf(Ctx, pos, "Re2", opName == "match" ? "Match" : "Grep", {BuildTuple(pos, {pattern, BuildLiteralNull(pos)})}):
  1732. BuildUdf(Ctx, pos, "Pcre", opName == "match" ? "BacktrackingMatch" : "BacktrackingGrep", { pattern });
  1733. isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
  1734. if (opName != "match") {
  1735. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotRegexp" : "Regexp");
  1736. } else {
  1737. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotMatch" : "Match");
  1738. }
  1739. } else {
  1740. Ctx.IncrementMonCounter("sql_errors", "UnknownMatchOp");
  1741. AltNotImplemented("match_op", cond);
  1742. return nullptr;
  1743. }
  1744. return (notMatch && isMatch) ? isMatch->ApplyUnaryOp(Ctx, pos, "Not") : isMatch;
  1745. }
  1746. case TRule_cond_expr::kAltCondExpr2: {
  1747. // | NOT? IN COMPACT? in_expr
  1748. auto altInExpr = cond.GetAlt_cond_expr2();
  1749. const bool notIn = altInExpr.HasBlock1();
  1750. auto hints = BuildTuple(pos, {});
  1751. bool isCompact = altInExpr.HasBlock3();
  1752. if (!isCompact) {
  1753. auto sqlHints = Ctx.PullHintForToken(Ctx.TokenPosition(altInExpr.GetToken2()));
  1754. isCompact = AnyOf(sqlHints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
  1755. }
  1756. if (isCompact) {
  1757. Ctx.IncrementMonCounter("sql_features", "IsCompactHint");
  1758. auto sizeHint = BuildTuple(pos, { BuildQuotedAtom(pos, "isCompact", NYql::TNodeFlags::Default) });
  1759. hints = BuildTuple(pos, { sizeHint });
  1760. }
  1761. TSqlExpression inSubexpr(Ctx, Mode);
  1762. auto inRight = inSubexpr.SqlInExpr(altInExpr.GetRule_in_expr4(), tail);
  1763. auto isIn = BuildBuiltinFunc(Ctx, pos, "In", {res, inRight, hints});
  1764. Ctx.IncrementMonCounter("sql_features", notIn ? "NotIn" : "In");
  1765. return (notIn && isIn) ? isIn->ApplyUnaryOp(Ctx, pos, "Not") : isIn;
  1766. }
  1767. case TRule_cond_expr::kAltCondExpr3: {
  1768. if (tail.Count) {
  1769. UnexpectedQuestionToken(tail);
  1770. return {};
  1771. }
  1772. auto altCase = cond.GetAlt_cond_expr3().GetBlock1().Alt_case();
  1773. const bool notNoll =
  1774. altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt2 ||
  1775. altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4
  1776. ;
  1777. if (altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 &&
  1778. !cond.GetAlt_cond_expr3().GetBlock1().GetAlt4().HasBlock1())
  1779. {
  1780. Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_MISSING_IS_BEFORE_NOT_NULL) << "Missing IS keyword before NOT NULL";
  1781. }
  1782. auto isNull = BuildIsNullOp(pos, res);
  1783. Ctx.IncrementMonCounter("sql_features", notNoll ? "NotNull" : "Null");
  1784. return (notNoll && isNull) ? isNull->ApplyUnaryOp(Ctx, pos, "Not") : isNull;
  1785. }
  1786. case TRule_cond_expr::kAltCondExpr4: {
  1787. auto alt = cond.GetAlt_cond_expr4();
  1788. const bool symmetric = alt.HasBlock3() && IS_TOKEN(alt.GetBlock3().GetToken1().GetId(), SYMMETRIC);
  1789. const bool negation = alt.HasBlock1();
  1790. TNodePtr left = SubExpr(alt.GetRule_eq_subexpr4(), {});
  1791. TNodePtr right = SubExpr(alt.GetRule_eq_subexpr6(), tail);
  1792. if (!left || !right) {
  1793. return {};
  1794. }
  1795. const bool bothArgNull = left->IsNull() && right->IsNull();
  1796. const bool oneArgNull = left->IsNull() || right->IsNull();
  1797. if (res->IsNull() || bothArgNull || (symmetric && oneArgNull)) {
  1798. Ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL)
  1799. << "BETWEEN operation will return NULL here";
  1800. }
  1801. auto buildSubexpr = [&](const TNodePtr& left, const TNodePtr& right) {
  1802. if (negation) {
  1803. return BuildBinaryOpRaw(
  1804. pos,
  1805. "Or",
  1806. BuildBinaryOpRaw(pos, "<", res, left),
  1807. BuildBinaryOpRaw(pos, ">", res, right)
  1808. );
  1809. } else {
  1810. return BuildBinaryOpRaw(
  1811. pos,
  1812. "And",
  1813. BuildBinaryOpRaw(pos, ">=", res, left),
  1814. BuildBinaryOpRaw(pos, "<=", res, right)
  1815. );
  1816. }
  1817. };
  1818. if (symmetric) {
  1819. Ctx.IncrementMonCounter("sql_features", negation? "NotBetweenSymmetric" : "BetweenSymmetric");
  1820. return BuildBinaryOpRaw(
  1821. pos,
  1822. negation? "And" : "Or",
  1823. buildSubexpr(left, right),
  1824. buildSubexpr(right, left)
  1825. );
  1826. } else {
  1827. Ctx.IncrementMonCounter("sql_features", negation? "NotBetween" : "Between");
  1828. return buildSubexpr(left, right);
  1829. }
  1830. }
  1831. case TRule_cond_expr::kAltCondExpr5: {
  1832. auto alt = cond.GetAlt_cond_expr5();
  1833. auto getNode = [](const TRule_cond_expr::TAlt5::TBlock1& b) -> const TRule_eq_subexpr& { return b.GetRule_eq_subexpr2(); };
  1834. return BinOpList(node.GetRule_eq_subexpr1(), getNode, alt.GetBlock1().begin(), alt.GetBlock1().end(), tail);
  1835. }
  1836. case TRule_cond_expr::ALT_NOT_SET:
  1837. Ctx.IncrementMonCounter("sql_errors", "UnknownConditionExpr");
  1838. AltNotImplemented("cond_expr", cond);
  1839. return nullptr;
  1840. }
  1841. }
  1842. return res;
  1843. }
  1844. TNodePtr TSqlExpression::BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const {
  1845. TPosition pos(Ctx.Pos());
  1846. const size_t opCount = end - begin;
  1847. Y_DEBUG_ABORT_UNLESS(opCount >= 2);
  1848. if (opCount == 2) {
  1849. return BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1));
  1850. } if (opCount == 3) {
  1851. return BuildBinaryOp(Ctx, pos, opName, BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1)), *(begin+2));
  1852. } else {
  1853. auto mid = begin + opCount / 2;
  1854. return BuildBinaryOp(Ctx, pos, opName, BinOperList(opName, begin, mid), BinOperList(opName, mid, end));
  1855. }
  1856. }
  1857. TSqlExpression::TCaseBranch TSqlExpression::ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const {
  1858. YQL_ENSURE(begin < end);
  1859. const size_t branchCount = end - begin;
  1860. if (branchCount == 1) {
  1861. return *begin;
  1862. }
  1863. auto mid = begin + branchCount / 2;
  1864. auto left = ReduceCaseBranches(begin, mid);
  1865. auto right = ReduceCaseBranches(mid, end);
  1866. TVector<TNodePtr> preds;
  1867. preds.reserve(branchCount);
  1868. for (auto it = begin; it != end; ++it) {
  1869. preds.push_back(it->Pred);
  1870. }
  1871. TCaseBranch result;
  1872. result.Pred = new TCallNodeImpl(Ctx.Pos(), "Or", CloneContainer(preds));
  1873. result.Value = BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { left.Pred, left.Value, right.Value });
  1874. return result;
  1875. }
  1876. template <typename TNode, typename TGetNode, typename TIter>
  1877. TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1878. if (begin == end) {
  1879. return SubExpr(node, tail);
  1880. }
  1881. // can't have top level smart_parenthesis node if any binary operation is present
  1882. MaybeUnnamedSmartParenOnTop = false;
  1883. Ctx.IncrementMonCounter("sql_binary_operations", opName);
  1884. const size_t listSize = end - begin;
  1885. TVector<TNodePtr> nodes;
  1886. nodes.reserve(1 + listSize);
  1887. nodes.push_back(SubExpr(node, {}));
  1888. for (; begin != end; ++begin) {
  1889. nodes.push_back(SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  1890. }
  1891. return BinOperList(opName, nodes.begin(), nodes.end());
  1892. }
  1893. template <typename TNode, typename TGetNode, typename TIter>
  1894. TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1895. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  1896. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  1897. while (begin != end) {
  1898. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  1899. Token(begin->GetToken1());
  1900. TPosition pos(Ctx.Pos());
  1901. TString opName;
  1902. auto tokenId = begin->GetToken1().GetId();
  1903. if (IS_TOKEN(tokenId, LESS)) {
  1904. opName = "<";
  1905. Ctx.IncrementMonCounter("sql_binary_operations", "Less");
  1906. } else if (IS_TOKEN(tokenId, LESS_OR_EQ)) {
  1907. opName = "<=";
  1908. Ctx.IncrementMonCounter("sql_binary_operations", "LessOrEq");
  1909. } else if (IS_TOKEN(tokenId, GREATER)) {
  1910. opName = ">";
  1911. Ctx.IncrementMonCounter("sql_binary_operations", "Greater");
  1912. } else if (IS_TOKEN(tokenId, GREATER_OR_EQ)) {
  1913. opName = ">=";
  1914. Ctx.IncrementMonCounter("sql_binary_operations", "GreaterOrEq");
  1915. } else if (IS_TOKEN(tokenId, PLUS)) {
  1916. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedAdd" : "+MayWarn";
  1917. Ctx.IncrementMonCounter("sql_binary_operations", "Plus");
  1918. } else if (IS_TOKEN(tokenId, MINUS)) {
  1919. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedSub" : "-MayWarn";
  1920. Ctx.IncrementMonCounter("sql_binary_operations", "Minus");
  1921. } else if (IS_TOKEN(tokenId, ASTERISK)) {
  1922. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMul" : "*MayWarn";
  1923. Ctx.IncrementMonCounter("sql_binary_operations", "Multiply");
  1924. } else if (IS_TOKEN(tokenId, SLASH)) {
  1925. opName = "/MayWarn";
  1926. Ctx.IncrementMonCounter("sql_binary_operations", "Divide");
  1927. if (!Ctx.Scoped->PragmaClassicDivision && partialResult) {
  1928. partialResult = new TCallNodeImpl(pos, "SafeCast", {std::move(partialResult), BuildDataType(pos, "Double")});
  1929. } else if (Ctx.Scoped->PragmaCheckedOps) {
  1930. opName = "CheckedDiv";
  1931. }
  1932. } else if (IS_TOKEN(tokenId, PERCENT)) {
  1933. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMod" : "%MayWarn";
  1934. Ctx.IncrementMonCounter("sql_binary_operations", "Mod");
  1935. } else {
  1936. Ctx.IncrementMonCounter("sql_errors", "UnsupportedBinaryOperation");
  1937. Error() << "Unsupported binary operation token: " << tokenId;
  1938. return nullptr;
  1939. }
  1940. partialResult = BuildBinaryOp(Ctx, pos, opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  1941. ++begin;
  1942. }
  1943. return partialResult;
  1944. }
  1945. template <typename TGetNode, typename TIter>
  1946. TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1947. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  1948. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  1949. while (begin != end) {
  1950. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  1951. TString opName;
  1952. switch (begin->GetBlock1().Alt_case()) {
  1953. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt1: {
  1954. Token(begin->GetBlock1().GetAlt1().GetToken1());
  1955. auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
  1956. if (!IS_TOKEN(tokenId, SHIFT_LEFT)) {
  1957. Error() << "Unsupported binary operation token: " << tokenId;
  1958. return {};
  1959. }
  1960. opName = "ShiftLeft";
  1961. Ctx.IncrementMonCounter("sql_binary_operations", "ShiftLeft");
  1962. break;
  1963. }
  1964. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt2: {
  1965. opName = "ShiftRight";
  1966. Ctx.IncrementMonCounter("sql_binary_operations", "ShiftRight");
  1967. break;
  1968. }
  1969. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt3: {
  1970. Token(begin->GetBlock1().GetAlt3().GetToken1());
  1971. auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
  1972. if (!IS_TOKEN(tokenId, ROT_LEFT)) {
  1973. Error() << "Unsupported binary operation token: " << tokenId;
  1974. return {};
  1975. }
  1976. opName = "RotLeft";
  1977. Ctx.IncrementMonCounter("sql_binary_operations", "RotLeft");
  1978. break;
  1979. }
  1980. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt4: {
  1981. opName = "RotRight";
  1982. Ctx.IncrementMonCounter("sql_binary_operations", "RotRight");
  1983. break;
  1984. }
  1985. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt5: {
  1986. Token(begin->GetBlock1().GetAlt5().GetToken1());
  1987. auto tokenId = begin->GetBlock1().GetAlt5().GetToken1().GetId();
  1988. if (!IS_TOKEN(tokenId, AMPERSAND)) {
  1989. Error() << "Unsupported binary operation token: " << tokenId;
  1990. return {};
  1991. }
  1992. opName = "BitAnd";
  1993. Ctx.IncrementMonCounter("sql_binary_operations", "BitAnd");
  1994. break;
  1995. }
  1996. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt6: {
  1997. Token(begin->GetBlock1().GetAlt6().GetToken1());
  1998. auto tokenId = begin->GetBlock1().GetAlt6().GetToken1().GetId();
  1999. if (!IS_TOKEN(tokenId, PIPE)) {
  2000. Error() << "Unsupported binary operation token: " << tokenId;
  2001. return {};
  2002. }
  2003. opName = "BitOr";
  2004. Ctx.IncrementMonCounter("sql_binary_operations", "BitOr");
  2005. break;
  2006. }
  2007. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt7: {
  2008. Token(begin->GetBlock1().GetAlt7().GetToken1());
  2009. auto tokenId = begin->GetBlock1().GetAlt7().GetToken1().GetId();
  2010. if (!IS_TOKEN(tokenId, CARET)) {
  2011. Error() << "Unsupported binary operation token: " << tokenId;
  2012. return {};
  2013. }
  2014. opName = "BitXor";
  2015. Ctx.IncrementMonCounter("sql_binary_operations", "BitXor");
  2016. break;
  2017. }
  2018. case TRule_neq_subexpr_TBlock2_TBlock1::ALT_NOT_SET:
  2019. Y_ABORT("You should change implementation according to grammar changes");
  2020. }
  2021. partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  2022. ++begin;
  2023. }
  2024. return partialResult;
  2025. }
  2026. template <typename TGetNode, typename TIter>
  2027. TNodePtr TSqlExpression::BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  2028. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  2029. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  2030. while (begin != end) {
  2031. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  2032. TString opName;
  2033. switch (begin->GetBlock1().Alt_case()) {
  2034. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt1: {
  2035. Token(begin->GetBlock1().GetAlt1().GetToken1());
  2036. auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
  2037. if (!IS_TOKEN(tokenId, EQUALS)) {
  2038. Error() << "Unsupported binary operation token: " << tokenId;
  2039. return {};
  2040. }
  2041. Ctx.IncrementMonCounter("sql_binary_operations", "Equals");
  2042. opName = "==";
  2043. break;
  2044. }
  2045. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt2: {
  2046. Token(begin->GetBlock1().GetAlt2().GetToken1());
  2047. auto tokenId = begin->GetBlock1().GetAlt2().GetToken1().GetId();
  2048. if (!IS_TOKEN(tokenId, EQUALS2)) {
  2049. Error() << "Unsupported binary operation token: " << tokenId;
  2050. return {};
  2051. }
  2052. Ctx.IncrementMonCounter("sql_binary_operations", "Equals2");
  2053. opName = "==";
  2054. break;
  2055. }
  2056. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt3: {
  2057. Token(begin->GetBlock1().GetAlt3().GetToken1());
  2058. auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
  2059. if (!IS_TOKEN(tokenId, NOT_EQUALS)) {
  2060. Error() << "Unsupported binary operation token: " << tokenId;
  2061. return {};
  2062. }
  2063. Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals");
  2064. opName = "!=";
  2065. break;
  2066. }
  2067. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt4: {
  2068. Token(begin->GetBlock1().GetAlt4().GetToken1());
  2069. auto tokenId = begin->GetBlock1().GetAlt4().GetToken1().GetId();
  2070. if (!IS_TOKEN(tokenId, NOT_EQUALS2)) {
  2071. Error() << "Unsupported binary operation token: " << tokenId;
  2072. return {};
  2073. }
  2074. Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals2");
  2075. opName = "!=";
  2076. break;
  2077. }
  2078. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt5: {
  2079. Token(begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().GetToken1());
  2080. opName = begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().HasBlock2() ? "IsNotDistinctFrom" : "IsDistinctFrom";
  2081. Ctx.IncrementMonCounter("sql_binary_operations", opName);
  2082. break;
  2083. }
  2084. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::ALT_NOT_SET:
  2085. Y_ABORT("You should change implementation according to grammar changes");
  2086. }
  2087. partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  2088. ++begin;
  2089. }
  2090. return partialResult;
  2091. }
  2092. TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail) {
  2093. TSqlExpression expr(Ctx, Mode);
  2094. expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement);
  2095. auto result = expr.UnaryExpr(node.GetRule_in_unary_subexpr1(), tail);
  2096. return result;
  2097. }
  2098. TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) {
  2099. TVector<TNodePtr> exprs;
  2100. Token(node.GetToken1());
  2101. const TPosition pos(Ctx.Pos());
  2102. const bool isTuple = node.HasBlock3();
  2103. bool expectTuple = SmartParenthesisMode == ESmartParenthesis::InStatement;
  2104. EExpr mode = EExpr::Regular;
  2105. if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
  2106. mode = EExpr::SqlLambdaParams;
  2107. expectTuple = true;
  2108. }
  2109. if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) {
  2110. return {};
  2111. }
  2112. bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop && SmartParenthesisMode == ESmartParenthesis::GroupBy;
  2113. bool hasAliases = false;
  2114. bool hasUnnamed = false;
  2115. for (const auto& expr: exprs) {
  2116. if (expr->GetLabel()) {
  2117. hasAliases = true;
  2118. } else {
  2119. hasUnnamed = true;
  2120. }
  2121. if (hasAliases && hasUnnamed && !topLevelGroupBy) {
  2122. Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers");
  2123. Ctx.Error(pos) << "Structure does not allow anonymous members";
  2124. return nullptr;
  2125. }
  2126. }
  2127. if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) {
  2128. return exprs.back();
  2129. }
  2130. if (topLevelGroupBy) {
  2131. if (isTuple) {
  2132. Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy");
  2133. Token(node.GetBlock3().GetToken1());
  2134. Ctx.Error() << "Unexpected trailing comma in grouping elements list";
  2135. return nullptr;
  2136. }
  2137. Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode");
  2138. return BuildListOfNamedNodes(pos, std::move(exprs));
  2139. }
  2140. Ctx.IncrementMonCounter("sql_features", hasUnnamed ? "SimpleTuple" : "SimpleStruct");
  2141. return (hasUnnamed || expectTuple || exprs.size() == 0) ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs);
  2142. }
  2143. } // namespace NSQLTranslationV1