sql_expression.cpp 99 KB


  1. #include "sql_expression.h"
  2. #include "sql_call_expr.h"
  3. #include "sql_select.h"
  4. #include "sql_values.h"
  5. #include <yql/essentials/utils/utf8.h>
  6. #include <util/charset/wide.h>
  7. #include <util/string/ascii.h>
  8. #include <util/string/hex.h>
  9. #include "antlr_token.h"
  10. namespace NSQLTranslationV1 {
  11. using NALPDefault::SQLv1LexerTokens;
  12. using NALPDefaultAntlr4::SQLv1Antlr4Lexer;
  13. using namespace NSQLv1Generated;
  14. TNodePtr TSqlExpression::Build(const TRule_expr& node) {
  15. // expr:
  16. // or_subexpr (OR or_subexpr)*
  17. // | type_name_composite
  18. switch (node.Alt_case()) {
  19. case TRule_expr::kAltExpr1: {
  20. auto getNode = [](const TRule_expr_TAlt1_TBlock2& b) -> const TRule_or_subexpr& { return b.GetRule_or_subexpr2(); };
  21. return BinOper("Or", node.GetAlt_expr1().GetRule_or_subexpr1(), getNode,
  22. node.GetAlt_expr1().GetBlock2().begin(), node.GetAlt_expr1().GetBlock2().end(), {});
  23. }
  24. case TRule_expr::kAltExpr2: {
  25. return TypeNode(node.GetAlt_expr2().GetRule_type_name_composite1());
  26. }
  27. case TRule_expr::ALT_NOT_SET:
  28. Y_ABORT("You should change implementation according to grammar changes");
  29. }
  30. }
  31. TNodePtr TSqlExpression::Build(const TRule_lambda_or_parameter& node) {
  32. // lambda_or_parameter:
  33. // lambda
  34. // | bind_parameter
  35. switch (node.Alt_case()) {
  36. case TRule_lambda_or_parameter::kAltLambdaOrParameter1: {
  37. return LambdaRule(node.alt_lambda_or_parameter1().GetRule_lambda1());
  38. }
  39. case TRule_lambda_or_parameter::kAltLambdaOrParameter2: {
  40. TString named;
  41. if (!NamedNodeImpl(node.GetAlt_lambda_or_parameter2().GetRule_bind_parameter1(), named, *this)) {
  42. return nullptr;
  43. }
  44. auto namedNode = GetNamedNode(named);
  45. if (!namedNode) {
  46. return nullptr;
  47. }
  48. return namedNode;
  49. }
  50. case TRule_lambda_or_parameter::ALT_NOT_SET:
  51. Y_ABORT("You should change implementation according to grammar changes");
  52. }
  53. }
  54. TNodePtr TSqlExpression::SubExpr(const TRule_mul_subexpr& node, const TTrailingQuestions& tail) {
  55. // mul_subexpr: con_subexpr (DOUBLE_PIPE con_subexpr)*;
  56. auto getNode = [](const TRule_mul_subexpr::TBlock2& b) -> const TRule_con_subexpr& { return b.GetRule_con_subexpr2(); };
  57. return BinOper("Concat", node.GetRule_con_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  58. }
  59. TNodePtr TSqlExpression::SubExpr(const TRule_add_subexpr& node, const TTrailingQuestions& tail) {
  60. // add_subexpr: mul_subexpr ((ASTERISK | SLASH | PERCENT) mul_subexpr)*;
  61. auto getNode = [](const TRule_add_subexpr::TBlock2& b) -> const TRule_mul_subexpr& { return b.GetRule_mul_subexpr2(); };
  62. return BinOpList(node.GetRule_mul_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  63. }
  64. TNodePtr TSqlExpression::SubExpr(const TRule_bit_subexpr& node, const TTrailingQuestions& tail) {
  65. // bit_subexpr: add_subexpr ((PLUS | MINUS) add_subexpr)*;
  66. auto getNode = [](const TRule_bit_subexpr::TBlock2& b) -> const TRule_add_subexpr& { return b.GetRule_add_subexpr2(); };
  67. return BinOpList(node.GetRule_add_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  68. }
  69. TNodePtr TSqlExpression::SubExpr(const TRule_neq_subexpr& node, const TTrailingQuestions& tailExternal) {
  70. //neq_subexpr: bit_subexpr ((SHIFT_LEFT | shift_right | ROT_LEFT | rot_right | AMPERSAND | PIPE | CARET) bit_subexpr)*
  71. // // trailing QUESTIONS are used in optional simple types (String?) and optional lambda args: ($x, $y?) -> ($x)
  72. // ((double_question neq_subexpr) => double_question neq_subexpr | QUESTION+)?;
  73. YQL_ENSURE(tailExternal.Count == 0);
  74. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock3();
  75. TTrailingQuestions tail;
  76. if (node.HasBlock3() && node.GetBlock3().Alt_case() == TRule_neq_subexpr::TBlock3::kAlt2) {
  77. auto& questions = node.GetBlock3().GetAlt2();
  78. tail.Count = questions.GetBlock1().size();
  79. tail.Pos = Ctx.TokenPosition(questions.GetBlock1().begin()->GetToken1());
  80. YQL_ENSURE(tail.Count > 0);
  81. }
  82. auto getNode = [](const TRule_neq_subexpr::TBlock2& b) -> const TRule_bit_subexpr& { return b.GetRule_bit_subexpr2(); };
  83. auto result = BinOpList(node.GetRule_bit_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  84. if (!result) {
  85. return {};
  86. }
  87. if (node.HasBlock3()) {
  88. auto& block = node.GetBlock3();
  89. if (block.Alt_case() == TRule_neq_subexpr::TBlock3::kAlt1) {
  90. TSqlExpression altExpr(Ctx, Mode);
  91. auto altResult = SubExpr(block.GetAlt1().GetRule_neq_subexpr2(), {});
  92. if (!altResult) {
  93. return {};
  94. }
  95. const TVector<TNodePtr> args({result, altResult});
  96. Token(block.GetAlt1().GetRule_double_question1().GetToken1());
  97. result = BuildBuiltinFunc(Ctx, Ctx.Pos(), "Coalesce", args);
  98. }
  99. }
  100. return result;
  101. }
  102. TNodePtr TSqlExpression::SubExpr(const TRule_eq_subexpr& node, const TTrailingQuestions& tail) {
  103. // eq_subexpr: neq_subexpr ((LESS | LESS_OR_EQ | GREATER | GREATER_OR_EQ) neq_subexpr)*;
  104. auto getNode = [](const TRule_eq_subexpr::TBlock2& b) -> const TRule_neq_subexpr& { return b.GetRule_neq_subexpr2(); };
  105. return BinOpList(node.GetRule_neq_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  106. }
  107. TNodePtr TSqlExpression::SubExpr(const TRule_or_subexpr& node, const TTrailingQuestions& tail) {
  108. // or_subexpr: and_subexpr (AND and_subexpr)*;
  109. auto getNode = [](const TRule_or_subexpr::TBlock2& b) -> const TRule_and_subexpr& { return b.GetRule_and_subexpr2(); };
  110. return BinOper("And", node.GetRule_and_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  111. }
  112. TNodePtr TSqlExpression::SubExpr(const TRule_and_subexpr& node, const TTrailingQuestions& tail) {
  113. // and_subexpr: xor_subexpr (XOR xor_subexpr)*;
  114. auto getNode = [](const TRule_and_subexpr::TBlock2& b) -> const TRule_xor_subexpr& { return b.GetRule_xor_subexpr2(); };
  115. return BinOper("Xor", node.GetRule_xor_subexpr1(), getNode, node.GetBlock2().begin(), node.GetBlock2().end(), tail);
  116. }
  117. bool ChangefeedSettingsEntry(const TRule_changefeed_settings_entry& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
  118. const auto id = IdEx(node.GetRule_an_id1(), ctx);
  119. if (alter) {
  120. // currently we don't support alter settings
  121. ctx.Error() << to_upper(id.Name) << " alter is not supported";
  122. return false;
  123. }
  124. const auto& setting = node.GetRule_changefeed_setting_value3();
  125. auto exprNode = ctx.Build(setting.GetRule_expr1());
  126. if (!exprNode) {
  127. ctx.Context().Error(id.Pos) << "Invalid changefeed setting: " << id.Name;
  128. return false;
  129. }
  130. if (to_lower(id.Name) == "sink_type") {
  131. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  132. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  133. return false;
  134. }
  135. const auto value = exprNode->GetLiteralValue();
  136. if (to_lower(value) == "local") {
  137. settings.SinkSettings = TChangefeedSettings::TLocalSinkSettings();
  138. } else {
  139. ctx.Context().Error() << "Unknown changefeed sink type: " << value;
  140. return false;
  141. }
  142. } else if (to_lower(id.Name) == "mode") {
  143. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  144. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  145. return false;
  146. }
  147. settings.Mode = exprNode;
  148. } else if (to_lower(id.Name) == "format") {
  149. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  150. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  151. return false;
  152. }
  153. settings.Format = exprNode;
  154. } else if (to_lower(id.Name) == "initial_scan") {
  155. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
  156. ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
  157. return false;
  158. }
  159. settings.InitialScan = exprNode;
  160. } else if (to_lower(id.Name) == "virtual_timestamps") {
  161. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "Bool") {
  162. ctx.Context().Error() << "Literal of Bool type is expected for " << id.Name;
  163. return false;
  164. }
  165. settings.VirtualTimestamps = exprNode;
  166. } else if (to_lower(id.Name) == "barriers_interval" || to_lower(id.Name) == "resolved_timestamps") {
  167. if (exprNode->GetOpName() != "Interval") {
  168. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  169. return false;
  170. }
  171. settings.BarriersInterval = exprNode;
  172. } else if (to_lower(id.Name) == "retention_period") {
  173. if (exprNode->GetOpName() != "Interval") {
  174. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  175. return false;
  176. }
  177. settings.RetentionPeriod = exprNode;
  178. } else if (to_lower(id.Name) == "topic_auto_partitioning") {
  179. auto v = to_lower(exprNode->GetLiteralValue());
  180. if (v != "enabled" && v != "disabled") {
  181. ctx.Context().Error() << "Literal of Interval type is expected for " << id.Name;
  182. }
  183. settings.TopicAutoPartitioning = exprNode;
  184. } else if (to_lower(id.Name) == "topic_max_active_partitions") {
  185. if (!exprNode->IsIntegerLiteral()) {
  186. ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
  187. return false;
  188. }
  189. settings.TopicMaxActivePartitions = exprNode;
  190. } else if (to_lower(id.Name) == "topic_min_active_partitions") {
  191. if (!exprNode->IsIntegerLiteral()) {
  192. ctx.Context().Error() << "Literal of integer type is expected for " << id.Name;
  193. return false;
  194. }
  195. settings.TopicPartitions = exprNode;
  196. } else if (to_lower(id.Name) == "aws_region") {
  197. if (!exprNode->IsLiteral() || exprNode->GetLiteralType() != "String") {
  198. ctx.Context().Error() << "Literal of String type is expected for " << id.Name;
  199. return false;
  200. }
  201. settings.AwsRegion = exprNode;
  202. } else {
  203. ctx.Context().Error(id.Pos) << "Unknown changefeed setting: " << id.Name;
  204. return false;
  205. }
  206. return true;
  207. }
  208. bool ChangefeedSettings(const TRule_changefeed_settings& node, TSqlExpression& ctx, TChangefeedSettings& settings, bool alter) {
  209. if (!ChangefeedSettingsEntry(node.GetRule_changefeed_settings_entry1(), ctx, settings, alter)) {
  210. return false;
  211. }
  212. for (auto& block : node.GetBlock2()) {
  213. if (!ChangefeedSettingsEntry(block.GetRule_changefeed_settings_entry2(), ctx, settings, alter)) {
  214. return false;
  215. }
  216. }
  217. return true;
  218. }
  219. bool CreateChangefeed(const TRule_changefeed& node, TSqlExpression& ctx, TVector<TChangefeedDescription>& changefeeds) {
  220. changefeeds.emplace_back(IdEx(node.GetRule_an_id2(), ctx));
  221. if (!ChangefeedSettings(node.GetRule_changefeed_settings5(), ctx, changefeeds.back().Settings, false)) {
  222. return false;
  223. }
  224. return true;
  225. }
  226. namespace {
  227. bool WithoutAlpha(const std::string_view &literal) {
  228. return literal.cend() == std::find_if(literal.cbegin(), literal.cend(), [](char c) { return std::isalpha(c) || (c & '\x80'); });
  229. }
  230. }
  231. bool Expr(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr& node) {
  232. TNodePtr exprNode = sqlExpr.Build(node);
  233. if (!exprNode) {
  234. return false;
  235. }
  236. exprNodes.push_back(exprNode);
  237. return true;
  238. }
  239. bool ExprList(TSqlExpression& sqlExpr, TVector<TNodePtr>& exprNodes, const TRule_expr_list& node) {
  240. if (!Expr(sqlExpr, exprNodes, node.GetRule_expr1())) {
  241. return false;
  242. }
  243. for (auto b: node.GetBlock2()) {
  244. sqlExpr.Token(b.GetToken1());
  245. if (!Expr(sqlExpr, exprNodes, b.GetRule_expr2())) {
  246. return false;
  247. }
  248. }
  249. return true;
  250. }
  251. bool ParseNumbers(TContext& ctx, const TString& strOrig, ui64& value, TString& suffix) {
  252. const auto str = to_lower(strOrig);
  253. const auto strLen = str.size();
  254. ui64 base = 10;
  255. if (strLen > 2 && str[0] == '0') {
  256. const auto formatChar = str[1];
  257. if (formatChar == 'x') {
  258. base = 16;
  259. } else if (formatChar == 'o') {
  260. base = 8;
  261. } else if (formatChar == 'b') {
  262. base = 2;
  263. }
  264. }
  265. if (strLen > 1) {
  266. auto iter = str.cend() - 1;
  267. if (*iter == 'l' || *iter == 's' || *iter == 't' || *iter == 's' || *iter == 'i' || *iter == 'b' || *iter == 'n') {
  268. --iter;
  269. }
  270. if (*iter == 'u' || *iter == 'p') {
  271. --iter;
  272. }
  273. suffix = TString(++iter, str.cend());
  274. }
  275. value = 0;
  276. const TString digString(str.begin() + (base == 10 ? 0 : 2), str.end() - suffix.size());
  277. for (const char& cur: digString) {
  278. const ui64 curDigit = Char2DigitTable[static_cast<int>(cur)];
  279. if (curDigit >= base) {
  280. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", char: '" << cur <<
  281. "' is out of base: " << base;
  282. return false;
  283. }
  284. ui64 curValue = value;
  285. value *= base;
  286. bool overflow = ((value / base) != curValue);
  287. if (!overflow) {
  288. curValue = value;
  289. value += curDigit;
  290. overflow = value < curValue;
  291. }
  292. if (overflow) {
  293. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << strOrig << ", number limit overflow";
  294. return false;
  295. }
  296. }
  297. return true;
  298. }
  299. TNodePtr LiteralNumber(TContext& ctx, const TRule_integer& node) {
  300. const TString intergerString = ctx.Token(node.GetToken1());
  301. if (to_lower(intergerString).EndsWith("pn")) {
  302. // TODO: add validation
  303. return new TLiteralNode(ctx.Pos(), "PgNumeric", intergerString.substr(0, intergerString.size() - 2));
  304. }
  305. ui64 value;
  306. TString suffix;
  307. if (!ParseNumbers(ctx, intergerString, value, suffix)) {
  308. return {};
  309. }
  310. const bool noSpaceForInt32 = value >> 31;
  311. const bool noSpaceForInt64 = value >> 63;
  312. if (suffix == "") {
  313. bool implicitType = true;
  314. if (noSpaceForInt64) {
  315. return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value), implicitType);
  316. } else if (noSpaceForInt32) {
  317. return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value), implicitType);
  318. }
  319. return new TLiteralNumberNode<i32>(ctx.Pos(), "Int32", ToString(value), implicitType);
  320. } else if (suffix == "p") {
  321. bool implicitType = true;
  322. if (noSpaceForInt64) {
  323. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", 64 bit signed integer overflow";
  324. return {};
  325. } else if (noSpaceForInt32) {
  326. return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value), implicitType);
  327. }
  328. return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value), implicitType);
  329. } else if (suffix == "u") {
  330. return new TLiteralNumberNode<ui32>(ctx.Pos(), "Uint32", ToString(value));
  331. } else if (suffix == "ul") {
  332. return new TLiteralNumberNode<ui64>(ctx.Pos(), "Uint64", ToString(value));
  333. } else if (suffix == "ut") {
  334. return new TLiteralNumberNode<ui8>(ctx.Pos(), "Uint8", ToString(value));
  335. } else if (suffix == "t") {
  336. return new TLiteralNumberNode<i8>(ctx.Pos(), "Int8", ToString(value));
  337. } else if (suffix == "l") {
  338. return new TLiteralNumberNode<i64>(ctx.Pos(), "Int64", ToString(value));
  339. } else if (suffix == "us") {
  340. return new TLiteralNumberNode<ui16>(ctx.Pos(), "Uint16", ToString(value));
  341. } else if (suffix == "s") {
  342. return new TLiteralNumberNode<i16>(ctx.Pos(), "Int16", ToString(value));
  343. } else if (suffix == "ps") {
  344. return new TLiteralNumberNode<i16>(ctx.Pos(), "PgInt2", ToString(value));
  345. } else if (suffix == "pi") {
  346. return new TLiteralNumberNode<i32>(ctx.Pos(), "PgInt4", ToString(value));
  347. } else if (suffix == "pb") {
  348. return new TLiteralNumberNode<i64>(ctx.Pos(), "PgInt8", ToString(value));
  349. } else {
  350. ctx.Error(ctx.Pos()) << "Failed to parse number from string: " << intergerString << ", invalid suffix: " << suffix;
  351. return {};
  352. }
  353. }
  354. TNodePtr LiteralReal(TContext& ctx, const TRule_real& node) {
  355. const TString value(ctx.Token(node.GetToken1()));
  356. YQL_ENSURE(!value.empty());
  357. auto lower = to_lower(value);
  358. if (lower.EndsWith("f")) {
  359. return new TLiteralNumberNode<float>(ctx.Pos(), "Float", value.substr(0, value.size()-1));
  360. } else if (lower.EndsWith("p")) {
  361. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-1));
  362. } else if (lower.EndsWith("pf4")) {
  363. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat4", value.substr(0, value.size()-3));
  364. } else if (lower.EndsWith("pf8")) {
  365. return new TLiteralNumberNode<float>(ctx.Pos(), "PgFloat8", value.substr(0, value.size()-3));
  366. } else if (lower.EndsWith("pn")) {
  367. return new TLiteralNode(ctx.Pos(), "PgNumeric", value.substr(0, value.size()-2));
  368. } else {
  369. return new TLiteralNumberNode<double>(ctx.Pos(), "Double", value);
  370. }
  371. }
  372. TMaybe<TExprOrIdent> TSqlExpression::LiteralExpr(const TRule_literal_value& node) {
  373. TExprOrIdent result;
  374. switch (node.Alt_case()) {
  375. case TRule_literal_value::kAltLiteralValue1: {
  376. result.Expr = LiteralNumber(Ctx, node.GetAlt_literal_value1().GetRule_integer1());
  377. break;
  378. }
  379. case TRule_literal_value::kAltLiteralValue2: {
  380. result.Expr = LiteralReal(Ctx, node.GetAlt_literal_value2().GetRule_real1());
  381. break;
  382. }
  383. case TRule_literal_value::kAltLiteralValue3: {
  384. const TString value(Token(node.GetAlt_literal_value3().GetToken1()));
  385. return BuildLiteralTypedSmartStringOrId(Ctx, value);
  386. }
  387. case TRule_literal_value::kAltLiteralValue5: {
  388. Token(node.GetAlt_literal_value5().GetToken1());
  389. result.Expr = BuildLiteralNull(Ctx.Pos());
  390. break;
  391. }
  392. case TRule_literal_value::kAltLiteralValue9: {
  393. const TString value(to_lower(Token(node.GetAlt_literal_value9().GetRule_bool_value1().GetToken1())));
  394. result.Expr = BuildLiteralBool(Ctx.Pos(), FromString<bool>(value));
  395. break;
  396. }
  397. case TRule_literal_value::kAltLiteralValue10: {
  398. result.Expr = BuildEmptyAction(Ctx.Pos());
  399. break;
  400. }
  401. case TRule_literal_value::kAltLiteralValue4:
  402. case TRule_literal_value::kAltLiteralValue6:
  403. case TRule_literal_value::kAltLiteralValue7:
  404. case TRule_literal_value::kAltLiteralValue8:
  405. case TRule_literal_value::ALT_NOT_SET:
  406. AltNotImplemented("literal_value", node);
  407. }
  408. if (!result.Expr) {
  409. return {};
  410. }
  411. return result;
  412. }
  413. template<typename TUnarySubExprType>
  414. TNodePtr TSqlExpression::UnaryExpr(const TUnarySubExprType& node, const TTrailingQuestions& tail) {
  415. if constexpr (std::is_same_v<TUnarySubExprType, TRule_unary_subexpr>) {
  416. if (node.Alt_case() == TRule_unary_subexpr::kAltUnarySubexpr1) {
  417. return UnaryCasualExpr(node.GetAlt_unary_subexpr1().GetRule_unary_casual_subexpr1(), tail);
  418. } else if (tail.Count) {
  419. UnexpectedQuestionToken(tail);
  420. return {};
  421. } else {
  422. MaybeUnnamedSmartParenOnTop = false;
  423. return JsonApiExpr(node.GetAlt_unary_subexpr2().GetRule_json_api_expr1());
  424. }
  425. } else {
  426. MaybeUnnamedSmartParenOnTop = false;
  427. if (node.Alt_case() == TRule_in_unary_subexpr::kAltInUnarySubexpr1) {
  428. return UnaryCasualExpr(node.GetAlt_in_unary_subexpr1().GetRule_in_unary_casual_subexpr1(), tail);
  429. } else if (tail.Count) {
  430. UnexpectedQuestionToken(tail);
  431. return {};
  432. } else {
  433. return JsonApiExpr(node.GetAlt_in_unary_subexpr2().GetRule_json_api_expr1());
  434. }
  435. }
  436. }
  437. TNodePtr TSqlExpression::JsonPathSpecification(const TRule_jsonpath_spec& node) {
  438. /*
  439. jsonpath_spec: STRING_VALUE;
  440. */
  441. TString value = Token(node.GetToken1());
  442. TPosition pos = Ctx.Pos();
  443. auto parsed = StringContent(Ctx, pos, value);
  444. if (!parsed) {
  445. return nullptr;
  446. }
  447. return new TCallNodeImpl(pos, "Utf8", {BuildQuotedAtom(pos, parsed->Content, parsed->Flags)});
  448. }
  449. TNodePtr TSqlExpression::JsonReturningTypeRule(const TRule_type_name_simple& node) {
  450. /*
  451. (RETURNING type_name_simple)?
  452. */
  453. return TypeSimple(node, /* onlyDataAllowed */ true);
  454. }
  455. TNodePtr TSqlExpression::JsonInputArg(const TRule_json_common_args& node) {
  456. /*
  457. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  458. */
  459. TNodePtr jsonExpr = Build(node.GetRule_expr1());
  460. if (!jsonExpr || jsonExpr->IsNull()) {
  461. jsonExpr = new TCallNodeImpl(Ctx.Pos(), "Nothing", {
  462. new TCallNodeImpl(Ctx.Pos(), "OptionalType", {BuildDataType(Ctx.Pos(), "Json")})
  463. });
  464. }
  465. return jsonExpr;
  466. }
  467. void TSqlExpression::AddJsonVariable(const TRule_json_variable& node, TVector<TNodePtr>& children) {
  468. /*
  469. json_variable: expr AS json_variable_name;
  470. */
  471. TNodePtr expr;
  472. TString rawName;
  473. TPosition namePos = Ctx.Pos();
  474. ui32 nameFlags = 0;
  475. expr = Build(node.GetRule_expr1());
  476. const auto& nameRule = node.GetRule_json_variable_name3();
  477. switch (nameRule.GetAltCase()) {
  478. case TRule_json_variable_name::kAltJsonVariableName1:
  479. rawName = Id(nameRule.GetAlt_json_variable_name1().GetRule_id_expr1(), *this);
  480. nameFlags = TNodeFlags::ArbitraryContent;
  481. break;
  482. case TRule_json_variable_name::kAltJsonVariableName2: {
  483. const auto& token = nameRule.GetAlt_json_variable_name2().GetToken1();
  484. namePos = GetPos(token);
  485. auto parsed = StringContentOrIdContent(Ctx, namePos, token.GetValue());
  486. if (!parsed) {
  487. return;
  488. }
  489. rawName = parsed->Content;
  490. nameFlags = parsed->Flags;
  491. break;
  492. }
  493. case TRule_json_variable_name::ALT_NOT_SET:
  494. Y_ABORT("You should change implementation according to grammar changes");
  495. }
  496. TNodePtr nameExpr = BuildQuotedAtom(namePos, rawName, nameFlags);
  497. children.push_back(BuildTuple(namePos, {nameExpr, expr}));
  498. }
  499. void TSqlExpression::AddJsonVariables(const TRule_json_variables& node, TVector<TNodePtr>& children) {
  500. /*
  501. json_variables: json_variable (COMMA json_variable)*;
  502. */
  503. AddJsonVariable(node.GetRule_json_variable1(), children);
  504. for (size_t i = 0; i < node.Block2Size(); i++) {
  505. AddJsonVariable(node.GetBlock2(i).GetRule_json_variable2(), children);
  506. }
  507. }
  508. TNodePtr TSqlExpression::JsonVariables(const TRule_json_common_args& node) {
  509. /*
  510. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  511. */
  512. TVector<TNodePtr> variables;
  513. TPosition pos = Ctx.Pos();
  514. if (node.HasBlock4()) {
  515. const auto& block = node.GetBlock4();
  516. pos = GetPos(block.GetToken1());
  517. AddJsonVariables(block.GetRule_json_variables2(), variables);
  518. }
  519. return new TCallNodeImpl(pos, "JsonVariables", variables);
  520. }
  521. void TSqlExpression::AddJsonCommonArgs(const TRule_json_common_args& node, TVector<TNodePtr>& children) {
  522. /*
  523. json_common_args: expr COMMA jsonpath_spec (PASSING json_variables)?;
  524. */
  525. TNodePtr jsonExpr = JsonInputArg(node);
  526. TNodePtr jsonPath = JsonPathSpecification(node.GetRule_jsonpath_spec3());
  527. TNodePtr variables = JsonVariables(node);
  528. children.push_back(jsonExpr);
  529. children.push_back(jsonPath);
  530. children.push_back(variables);
  531. }
  532. TNodePtr TSqlExpression::JsonValueCaseHandler(const TRule_json_case_handler& node, EJsonValueHandlerMode& mode) {
  533. /*
  534. json_case_handler: ERROR | NULL | (DEFAULT expr);
  535. */
  536. switch (node.GetAltCase()) {
  537. case TRule_json_case_handler::kAltJsonCaseHandler1: {
  538. const auto pos = GetPos(node.GetAlt_json_case_handler1().GetToken1());
  539. mode = EJsonValueHandlerMode::Error;
  540. return new TCallNodeImpl(pos, "Null", {});
  541. }
  542. case TRule_json_case_handler::kAltJsonCaseHandler2: {
  543. const auto pos = GetPos(node.GetAlt_json_case_handler2().GetToken1());
  544. mode = EJsonValueHandlerMode::DefaultValue;
  545. return new TCallNodeImpl(pos, "Null", {});
  546. }
  547. case TRule_json_case_handler::kAltJsonCaseHandler3:
  548. mode = EJsonValueHandlerMode::DefaultValue;
  549. return Build(node.GetAlt_json_case_handler3().GetRule_expr2());
  550. case TRule_json_case_handler::ALT_NOT_SET:
  551. Y_ABORT("You should change implementation according to grammar changes");
  552. }
  553. }
  554. void TSqlExpression::AddJsonValueCaseHandlers(const TRule_json_value& node, TVector<TNodePtr>& children) {
  555. /*
  556. json_case_handler*
  557. */
  558. if (node.Block5Size() > 2) {
  559. Ctx.Error() << "Only 1 ON EMPTY and/or 1 ON ERROR clause is expected";
  560. Ctx.IncrementMonCounter("sql_errors", "JsonValueTooManyHandleClauses");
  561. return;
  562. }
  563. TNodePtr onEmpty;
  564. EJsonValueHandlerMode onEmptyMode = EJsonValueHandlerMode::DefaultValue;
  565. TNodePtr onError;
  566. EJsonValueHandlerMode onErrorMode = EJsonValueHandlerMode::DefaultValue;
  567. for (size_t i = 0; i < node.Block5Size(); i++) {
  568. const auto block = node.GetBlock5(i);
  569. const bool isEmptyClause = to_lower(block.GetToken3().GetValue()) == "empty";
  570. if (isEmptyClause && onEmpty != nullptr) {
  571. Ctx.Error() << "Only 1 ON EMPTY clause is expected";
  572. Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnEmptyClauses");
  573. return;
  574. }
  575. if (!isEmptyClause && onError != nullptr) {
  576. Ctx.Error() << "Only 1 ON ERROR clause is expected";
  577. Ctx.IncrementMonCounter("sql_errors", "JsonValueMultipleOnErrorClauses");
  578. return;
  579. }
  580. if (isEmptyClause && onError != nullptr) {
  581. Ctx.Error() << "ON EMPTY clause must be before ON ERROR clause";
  582. Ctx.IncrementMonCounter("sql_errors", "JsonValueOnEmptyAfterOnError");
  583. return;
  584. }
  585. EJsonValueHandlerMode currentMode;
  586. TNodePtr currentHandler = JsonValueCaseHandler(block.GetRule_json_case_handler1(), currentMode);
  587. if (isEmptyClause) {
  588. onEmpty = currentHandler;
  589. onEmptyMode = currentMode;
  590. } else {
  591. onError = currentHandler;
  592. onErrorMode = currentMode;
  593. }
  594. }
  595. if (onEmpty == nullptr) {
  596. onEmpty = new TCallNodeImpl(Ctx.Pos(), "Null", {});
  597. }
  598. if (onError == nullptr) {
  599. onError = new TCallNodeImpl(Ctx.Pos(), "Null", {});
  600. }
  601. children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onEmptyMode), TNodeFlags::Default));
  602. children.push_back(onEmpty);
  603. children.push_back(BuildQuotedAtom(Ctx.Pos(), ToString(onErrorMode), TNodeFlags::Default));
  604. children.push_back(onError);
  605. }
  606. TNodePtr TSqlExpression::JsonValueExpr(const TRule_json_value& node) {
  607. /*
  608. json_value: JSON_VALUE LPAREN
  609. json_common_args
  610. (RETURNING type_name_simple)?
  611. (json_case_handler ON (EMPTY | ERROR))*
  612. RPAREN;
  613. */
  614. TVector<TNodePtr> children;
  615. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  616. AddJsonValueCaseHandlers(node, children);
  617. if (node.HasBlock4()) {
  618. auto returningType = JsonReturningTypeRule(node.GetBlock4().GetRule_type_name_simple2());
  619. if (!returningType) {
  620. return {};
  621. }
  622. children.push_back(returningType);
  623. }
  624. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonValue", children);
  625. }
  626. void TSqlExpression::AddJsonExistsHandler(const TRule_json_exists& node, TVector<TNodePtr>& children) {
  627. /*
  628. json_exists: JSON_EXISTS LPAREN
  629. json_common_args
  630. json_exists_handler?
  631. RPAREN;
  632. */
  633. auto buildJustBool = [&](const TPosition& pos, bool value) {
  634. return new TCallNodeImpl(pos, "Just", {BuildLiteralBool(pos, value)});
  635. };
  636. if (!node.HasBlock4()) {
  637. children.push_back(buildJustBool(Ctx.Pos(), false));
  638. return;
  639. }
  640. const auto& handlerRule = node.GetBlock4().GetRule_json_exists_handler1();
  641. const auto& token = handlerRule.GetToken1();
  642. const auto pos = GetPos(token);
  643. const auto mode = to_lower(token.GetValue());
  644. if (mode == "unknown") {
  645. const auto nothingNode = new TCallNodeImpl(pos, "Nothing", {
  646. new TCallNodeImpl(pos, "OptionalType", {BuildDataType(pos, "Bool")})
  647. });
  648. children.push_back(nothingNode);
  649. } else if (mode != "error") {
  650. children.push_back(buildJustBool(pos, FromString<bool>(mode)));
  651. }
  652. }
  653. TNodePtr TSqlExpression::JsonExistsExpr(const TRule_json_exists& node) {
  654. /*
  655. json_exists: JSON_EXISTS LPAREN
  656. json_common_args
  657. json_exists_handler?
  658. RPAREN;
  659. */
  660. TVector<TNodePtr> children;
  661. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  662. AddJsonExistsHandler(node, children);
  663. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonExists", children);
  664. }
  665. EJsonQueryWrap TSqlExpression::JsonQueryWrapper(const TRule_json_query& node) {
  666. /*
  667. json_query: JSON_QUERY LPAREN
  668. json_common_args
  669. (json_query_wrapper WRAPPER)?
  670. (json_query_handler ON EMPTY)?
  671. (json_query_handler ON ERROR)?
  672. RPAREN;
  673. */
  674. // default behaviour - no wrapping
  675. if (!node.HasBlock4()) {
  676. return EJsonQueryWrap::NoWrap;
  677. }
  678. // WITHOUT ARRAY? - no wrapping
  679. const auto& wrapperRule = node.GetBlock4().GetRule_json_query_wrapper1();
  680. if (wrapperRule.GetAltCase() == TRule_json_query_wrapper::kAltJsonQueryWrapper1) {
  681. return EJsonQueryWrap::NoWrap;
  682. }
  683. // WITH (CONDITIONAL | UNCONDITIONAL)? ARRAY? - wrapping depends on 2nd token. Default is UNCONDITIONAL
  684. const auto& withWrapperRule = wrapperRule.GetAlt_json_query_wrapper2();
  685. if (!withWrapperRule.HasBlock2()) {
  686. return EJsonQueryWrap::Wrap;
  687. }
  688. const auto& token = withWrapperRule.GetBlock2().GetToken1();
  689. if (to_lower(token.GetValue()) == "conditional") {
  690. return EJsonQueryWrap::ConditionalWrap;
  691. } else {
  692. return EJsonQueryWrap::Wrap;
  693. }
  694. }
  695. EJsonQueryHandler TSqlExpression::JsonQueryHandler(const TRule_json_query_handler& node) {
  696. /*
  697. json_query_handler: ERROR | NULL | (EMPTY ARRAY) | (EMPTY OBJECT);
  698. */
  699. switch (node.GetAltCase()) {
  700. case TRule_json_query_handler::kAltJsonQueryHandler1:
  701. return EJsonQueryHandler::Error;
  702. case TRule_json_query_handler::kAltJsonQueryHandler2:
  703. return EJsonQueryHandler::Null;
  704. case TRule_json_query_handler::kAltJsonQueryHandler3:
  705. return EJsonQueryHandler::EmptyArray;
  706. case TRule_json_query_handler::kAltJsonQueryHandler4:
  707. return EJsonQueryHandler::EmptyObject;
  708. case TRule_json_query_handler::ALT_NOT_SET:
  709. Y_ABORT("You should change implementation according to grammar changes");
  710. }
  711. }
  712. TNodePtr TSqlExpression::JsonQueryExpr(const TRule_json_query& node) {
  713. /*
  714. json_query: JSON_QUERY LPAREN
  715. json_common_args
  716. (json_query_wrapper WRAPPER)?
  717. (json_query_handler ON EMPTY)?
  718. (json_query_handler ON ERROR)?
  719. RPAREN;
  720. */
  721. TVector<TNodePtr> children;
  722. AddJsonCommonArgs(node.GetRule_json_common_args3(), children);
  723. auto addChild = [&](TPosition pos, const TString& content) {
  724. children.push_back(BuildQuotedAtom(pos, content, TNodeFlags::Default));
  725. };
  726. const auto wrapMode = JsonQueryWrapper(node);
  727. addChild(Ctx.Pos(), ToString(wrapMode));
  728. auto onEmpty = EJsonQueryHandler::Null;
  729. if (node.HasBlock5()) {
  730. if (wrapMode != EJsonQueryWrap::NoWrap) {
  731. Ctx.Error() << "ON EMPTY is prohibited because WRAPPER clause is specified";
  732. Ctx.IncrementMonCounter("sql_errors", "JsonQueryOnEmptyWithWrapper");
  733. return nullptr;
  734. }
  735. onEmpty = JsonQueryHandler(node.GetBlock5().GetRule_json_query_handler1());
  736. }
  737. addChild(Ctx.Pos(), ToString(onEmpty));
  738. auto onError = EJsonQueryHandler::Null;
  739. if (node.HasBlock6()) {
  740. onError = JsonQueryHandler(node.GetBlock6().GetRule_json_query_handler1());
  741. }
  742. addChild(Ctx.Pos(), ToString(onError));
  743. return new TCallNodeImpl(GetPos(node.GetToken1()), "JsonQuery", children);
  744. }
  745. TNodePtr TSqlExpression::JsonApiExpr(const TRule_json_api_expr& node) {
  746. /*
  747. json_api_expr: json_value | json_exists | json_query;
  748. */
  749. TPosition pos = Ctx.Pos();
  750. TNodePtr result = nullptr;
  751. switch (node.GetAltCase()) {
  752. case TRule_json_api_expr::kAltJsonApiExpr1: {
  753. const auto& jsonValue = node.GetAlt_json_api_expr1().GetRule_json_value1();
  754. pos = GetPos(jsonValue.GetToken1());
  755. result = JsonValueExpr(jsonValue);
  756. break;
  757. }
  758. case TRule_json_api_expr::kAltJsonApiExpr2: {
  759. const auto& jsonExists = node.GetAlt_json_api_expr2().GetRule_json_exists1();
  760. pos = GetPos(jsonExists.GetToken1());
  761. result = JsonExistsExpr(jsonExists);
  762. break;
  763. }
  764. case TRule_json_api_expr::kAltJsonApiExpr3: {
  765. const auto& jsonQuery = node.GetAlt_json_api_expr3().GetRule_json_query1();
  766. pos = GetPos(jsonQuery.GetToken1());
  767. result = JsonQueryExpr(jsonQuery);
  768. break;
  769. }
  770. case TRule_json_api_expr::ALT_NOT_SET:
  771. Y_ABORT("You should change implementation according to grammar changes");
  772. }
  773. return result;
  774. }
  775. TNodePtr TSqlExpression::RowPatternVarAccess(TString var, const TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2 block) {
  776. switch (block.GetAltCase()) {
  777. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1:
  778. break;
  779. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2:
  780. break;
  781. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3:
  782. switch (block.GetAlt3().GetRule_an_id_or_type1().GetAltCase()) {
  783. case TRule_an_id_or_type::kAltAnIdOrType1: {
  784. const auto &idOrType = block.GetAlt3().GetRule_an_id_or_type1().GetAlt_an_id_or_type1().GetRule_id_or_type1();
  785. switch(idOrType.GetAltCase()) {
  786. case TRule_id_or_type::kAltIdOrType1: {
  787. const auto column = Id(idOrType.GetAlt_id_or_type1().GetRule_id1(), *this);
  788. return BuildMatchRecognizeColumnAccess(Ctx.Pos(), std::move(var), std::move(column));
  789. }
  790. case TRule_id_or_type::kAltIdOrType2:
  791. break;
  792. case TRule_id_or_type::ALT_NOT_SET:
  793. break;
  794. }
  795. break;
  796. }
  797. case TRule_an_id_or_type::kAltAnIdOrType2:
  798. break;
  799. case TRule_an_id_or_type::ALT_NOT_SET:
  800. break;
  801. }
  802. break;
  803. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
  804. Y_ABORT("You should change implementation according to grammar changes");
  805. }
  806. return {};
  807. }
  808. template<typename TUnaryCasualExprRule>
  809. TNodePtr TSqlExpression::UnaryCasualExpr(const TUnaryCasualExprRule& node, const TTrailingQuestions& tail) {
  810. // unary_casual_subexpr: (id_expr | atom_expr) unary_subexpr_suffix;
  811. // OR
  812. // in_unary_casual_subexpr: (id_expr_in | in_atom_expr) unary_subexpr_suffix;
  813. // where
  814. // unary_subexpr_suffix: (key_expr | invoke_expr |(DOT (bind_parameter | DIGITS | id)))* (COLLATE id)?;
  815. const auto& suffix = node.GetRule_unary_subexpr_suffix2();
  816. const bool suffixIsEmpty = suffix.GetBlock1().empty() && !suffix.HasBlock2();
  817. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && suffixIsEmpty;
  818. TString name;
  819. TNodePtr expr;
  820. bool typePossible = false;
  821. auto& block = node.GetBlock1();
  822. switch (block.Alt_case()) {
  823. case TUnaryCasualExprRule::TBlock1::kAlt1: {
  824. MaybeUnnamedSmartParenOnTop = false;
  825. auto& alt = block.GetAlt1();
  826. if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
  827. name = Id(alt.GetRule_id_expr1(), *this);
  828. typePossible = !IsQuotedId(alt.GetRule_id_expr1(), *this);
  829. } else {
  830. // type was never possible here
  831. name = Id(alt.GetRule_id_expr_in1(), *this);
  832. }
  833. break;
  834. }
  835. case TUnaryCasualExprRule::TBlock1::kAlt2: {
  836. auto& alt = block.GetAlt2();
  837. TMaybe<TExprOrIdent> exprOrId;
  838. if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
  839. exprOrId = AtomExpr(alt.GetRule_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
  840. } else {
  841. MaybeUnnamedSmartParenOnTop = false;
  842. exprOrId = InAtomExpr(alt.GetRule_in_atom_expr1(), suffixIsEmpty ? tail : TTrailingQuestions{});
  843. }
  844. if (!exprOrId) {
  845. Ctx.IncrementMonCounter("sql_errors", "BadAtomExpr");
  846. return nullptr;
  847. }
  848. if (!exprOrId->Expr) {
  849. name = exprOrId->Ident;
  850. } else {
  851. expr = exprOrId->Expr;
  852. }
  853. break;
  854. }
  855. case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET:
  856. Y_ABORT("You should change implementation according to grammar changes");
  857. }
  858. // bool onlyDots = true;
  859. bool isColumnRef = !expr;
  860. bool isFirstElem = true;
  861. for (auto& _b : suffix.GetBlock1()) {
  862. auto& b = _b.GetBlock1();
  863. switch (b.Alt_case()) {
  864. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
  865. // key_expr
  866. // onlyDots = false;
  867. break;
  868. }
  869. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
  870. // invoke_expr - cannot be a column, function name
  871. if (isFirstElem) {
  872. isColumnRef = false;
  873. }
  874. // onlyDots = false;
  875. break;
  876. }
  877. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
  878. // In case of MATCH_RECOGNIZE lambdas
  879. // X.Y is treated as Var.Column access
  880. if (isColumnRef && (
  881. EColumnRefState::MatchRecognizeMeasures == Ctx.GetColumnReferenceState() ||
  882. EColumnRefState::MatchRecognizeDefine == Ctx.GetColumnReferenceState() ||
  883. EColumnRefState::MatchRecognizeDefineAggregate == Ctx.GetColumnReferenceState()
  884. )) {
  885. if (suffix.GetBlock1().size() != 1) {
  886. Ctx.Error() << "Expected Var.Column, but got chain of " << suffix.GetBlock1().size() << " column accesses";
  887. return nullptr;
  888. }
  889. return RowPatternVarAccess(std::move(name), b.GetAlt3().GetBlock2());
  890. }
  891. break;
  892. }
  893. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
  894. AltNotImplemented("unary_subexpr_suffix", b);
  895. return nullptr;
  896. }
  897. isFirstElem = false;
  898. }
  899. isFirstElem = true;
  900. TVector<INode::TIdPart> ids;
  901. INode::TPtr lastExpr;
  902. if (!isColumnRef) {
  903. lastExpr = expr;
  904. } else {
  905. const bool flexibleTypes = Ctx.FlexibleTypes;
  906. bool columnOrType = false;
  907. auto columnRefsState = Ctx.GetColumnReferenceState();
  908. bool explicitPgType = columnRefsState == EColumnRefState::AsPgType;
  909. if (explicitPgType && typePossible && suffixIsEmpty) {
  910. auto pgType = BuildSimpleType(Ctx, Ctx.Pos(), name, false);
  911. if (pgType && tail.Count) {
  912. Ctx.Error() << "Optional types are not supported in this context";
  913. return {};
  914. }
  915. return pgType;
  916. }
  917. if (auto simpleType = LookupSimpleType(name, flexibleTypes, false); simpleType && typePossible && suffixIsEmpty) {
  918. if (tail.Count > 0 || columnRefsState == EColumnRefState::Deny || !flexibleTypes) {
  919. // a type
  920. return AddOptionals(BuildSimpleType(Ctx, Ctx.Pos(), name, false), tail.Count);
  921. }
  922. // type or column: ambiguity will be resolved on type annotation stage
  923. columnOrType = columnRefsState == EColumnRefState::Allow;
  924. }
  925. if (tail.Count) {
  926. UnexpectedQuestionToken(tail);
  927. return {};
  928. }
  929. if (!Ctx.CheckColumnReference(Ctx.Pos(), name)) {
  930. return nullptr;
  931. }
  932. ids.push_back(columnOrType ? BuildColumnOrType(Ctx.Pos()) : BuildColumn(Ctx.Pos()));
  933. ids.push_back(name);
  934. }
  935. TPosition pos(Ctx.Pos());
  936. for (auto& _b : suffix.GetBlock1()) {
  937. auto& b = _b.GetBlock1();
  938. switch (b.Alt_case()) {
  939. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
  940. // key_expr
  941. auto keyExpr = KeyExpr(b.GetAlt1().GetRule_key_expr1());
  942. if (!keyExpr) {
  943. Ctx.IncrementMonCounter("sql_errors", "BadKeyExpr");
  944. return nullptr;
  945. }
  946. if (!lastExpr) {
  947. lastExpr = BuildAccess(pos, ids, false);
  948. ids.clear();
  949. }
  950. ids.push_back(lastExpr);
  951. ids.push_back(keyExpr);
  952. lastExpr = BuildAccess(pos, ids, true);
  953. ids.clear();
  954. break;
  955. }
  956. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
  957. // invoke_expr - cannot be a column, function name
  958. TSqlCallExpr call(Ctx, Mode);
  959. if (isFirstElem && !name.empty()) {
  960. call.AllowDistinct();
  961. call.InitName(name);
  962. } else {
  963. call.InitExpr(lastExpr);
  964. }
  965. bool initRet = call.Init(b.GetAlt2().GetRule_invoke_expr1());
  966. if (initRet) {
  967. call.IncCounters();
  968. }
  969. if (!initRet) {
  970. return nullptr;
  971. }
  972. lastExpr = call.BuildCall();
  973. if (!lastExpr) {
  974. return nullptr;
  975. }
  976. break;
  977. }
  978. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
  979. // dot
  980. if (lastExpr) {
  981. ids.push_back(lastExpr);
  982. }
  983. auto bb = b.GetAlt3().GetBlock2();
  984. switch (bb.Alt_case()) {
  985. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt1: {
  986. TString named;
  987. if (!NamedNodeImpl(bb.GetAlt1().GetRule_bind_parameter1(), named, *this)) {
  988. return nullptr;
  989. }
  990. auto namedNode = GetNamedNode(named);
  991. if (!namedNode) {
  992. return nullptr;
  993. }
  994. ids.push_back(named);
  995. ids.back().Expr = namedNode;
  996. break;
  997. }
  998. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt2: {
  999. const TString str(Token(bb.GetAlt2().GetToken1()));
  1000. ids.push_back(str);
  1001. break;
  1002. }
  1003. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::kAlt3: {
  1004. ids.push_back(Id(bb.GetAlt3().GetRule_an_id_or_type1(), *this));
  1005. break;
  1006. }
  1007. case TRule_unary_subexpr_suffix_TBlock1_TBlock1_TAlt3_TBlock2::ALT_NOT_SET:
  1008. Y_ABORT("You should change implementation according to grammar changes");
  1009. }
  1010. if (lastExpr) {
  1011. lastExpr = BuildAccess(pos, ids, false);
  1012. ids.clear();
  1013. }
  1014. break;
  1015. }
  1016. case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
  1017. AltNotImplemented("unary_subexpr_suffix", b);
  1018. return nullptr;
  1019. }
  1020. isFirstElem = false;
  1021. }
  1022. if (!lastExpr) {
  1023. lastExpr = BuildAccess(pos, ids, false);
  1024. ids.clear();
  1025. }
  1026. if (suffix.HasBlock2()) {
  1027. Ctx.IncrementMonCounter("sql_errors", "CollateUnarySubexpr");
  1028. Error() << "unary_subexpr: COLLATE is not implemented yet";
  1029. }
  1030. return lastExpr;
  1031. }
  1032. TNodePtr TSqlExpression::BindParameterRule(const TRule_bind_parameter& rule, const TTrailingQuestions& tail) {
  1033. TString namedArg;
  1034. if (!NamedNodeImpl(rule, namedArg, *this)) {
  1035. return {};
  1036. }
  1037. if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
  1038. Ctx.IncrementMonCounter("sql_features", "LambdaArgument");
  1039. if (tail.Count > 1) {
  1040. Ctx.Error(tail.Pos) << "Expecting at most one '?' token here (for optional lambda parameters), but got " << tail.Count;
  1041. return {};
  1042. }
  1043. return BuildAtom(Ctx.Pos(), namedArg, NYql::TNodeFlags::ArbitraryContent, tail.Count != 0);
  1044. }
  1045. if (tail.Count) {
  1046. UnexpectedQuestionToken(tail);
  1047. return {};
  1048. }
  1049. Ctx.IncrementMonCounter("sql_features", "NamedNodeUseAtom");
  1050. auto ret = GetNamedNode(namedArg);
  1051. if (ret) {
  1052. ret->SetRefPos(Ctx.Pos());
  1053. }
  1054. return ret;
  1055. }
  1056. TNodePtr TSqlExpression::LambdaRule(const TRule_lambda& rule) {
  1057. const auto& alt = rule;
  1058. const bool isSqlLambda = alt.HasBlock2();
  1059. if (!isSqlLambda) {
  1060. return SmartParenthesis(alt.GetRule_smart_parenthesis1());
  1061. }
  1062. MaybeUnnamedSmartParenOnTop = false;
  1063. TNodePtr parenthesis;
  1064. {
  1065. // we allow column reference here to postpone error and report it with better description in SqlLambdaParams
  1066. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  1067. TSqlExpression expr(Ctx, Mode);
  1068. expr.SetSmartParenthesisMode(ESmartParenthesis::SqlLambdaParams);
  1069. parenthesis = expr.SmartParenthesis(alt.GetRule_smart_parenthesis1());
  1070. }
  1071. if (!parenthesis) {
  1072. return {};
  1073. }
  1074. ui32 optionalArgumentsCount = 0;
  1075. TVector<TSymbolNameWithPos> args;
  1076. if (!SqlLambdaParams(parenthesis, args, optionalArgumentsCount)) {
  1077. return {};
  1078. }
  1079. auto bodyBlock = alt.GetBlock2();
  1080. Token(bodyBlock.GetToken1());
  1081. TPosition pos(Ctx.Pos());
  1082. TVector<TNodePtr> exprSeq;
  1083. for (auto& arg: args) {
  1084. arg.Name = PushNamedAtom(arg.Pos, arg.Name);
  1085. }
  1086. bool ret = false;
  1087. TColumnRefScope scope(Ctx, EColumnRefState::Deny);
  1088. scope.SetNoColumnErrContext("in lambda function");
  1089. if (bodyBlock.GetBlock2().HasAlt1()) {
  1090. ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt1().GetRule_expr2(), exprSeq);
  1091. } else {
  1092. ret = SqlLambdaExprBody(Ctx, bodyBlock.GetBlock2().GetAlt2().GetRule_lambda_body2(), exprSeq);
  1093. }
  1094. TVector<TString> argNames;
  1095. for (const auto& arg : args) {
  1096. argNames.push_back(arg.Name);
  1097. PopNamedNode(arg.Name);
  1098. }
  1099. if (!ret) {
  1100. return {};
  1101. }
  1102. auto lambdaNode = BuildSqlLambda(pos, std::move(argNames), std::move(exprSeq));
  1103. if (optionalArgumentsCount > 0) {
  1104. lambdaNode = new TCallNodeImpl(pos, "WithOptionalArgs", {
  1105. lambdaNode,
  1106. BuildQuotedAtom(pos, ToString(optionalArgumentsCount), TNodeFlags::Default)
  1107. });
  1108. }
  1109. return lambdaNode;
  1110. }
  1111. TNodePtr TSqlExpression::CastRule(const TRule_cast_expr& rule) {
  1112. Ctx.IncrementMonCounter("sql_features", "Cast");
  1113. const auto& alt = rule;
  1114. Token(alt.GetToken1());
  1115. TPosition pos(Ctx.Pos());
  1116. TSqlExpression expr(Ctx, Mode);
  1117. auto exprNode = expr.Build(rule.GetRule_expr3());
  1118. if (!exprNode) {
  1119. return {};
  1120. }
  1121. auto type = TypeNodeOrBind(rule.GetRule_type_name_or_bind5());
  1122. if (!type) {
  1123. return {};
  1124. }
  1125. return new TCallNodeImpl(pos, "SafeCast", {exprNode, type});
  1126. }
  1127. TNodePtr TSqlExpression::BitCastRule(const TRule_bitcast_expr& rule) {
  1128. Ctx.IncrementMonCounter("sql_features", "BitCast");
  1129. const auto& alt = rule;
  1130. Token(alt.GetToken1());
  1131. TPosition pos(Ctx.Pos());
  1132. TSqlExpression expr(Ctx, Mode);
  1133. auto exprNode = expr.Build(rule.GetRule_expr3());
  1134. if (!exprNode) {
  1135. return {};
  1136. }
  1137. auto type = TypeSimple(rule.GetRule_type_name_simple5(), true);
  1138. if (!type) {
  1139. return {};
  1140. }
  1141. return new TCallNodeImpl(pos, "BitCast", {exprNode, type});
  1142. }
  1143. TNodePtr TSqlExpression::ExistsRule(const TRule_exists_expr& rule) {
  1144. Ctx.IncrementMonCounter("sql_features", "Exists");
  1145. TPosition pos;
  1146. TSourcePtr source;
  1147. Token(rule.GetToken2());
  1148. switch (rule.GetBlock3().Alt_case()) {
  1149. case TRule_exists_expr::TBlock3::kAlt1: {
  1150. const auto& alt = rule.GetBlock3().GetAlt1().GetRule_select_stmt1();
  1151. TSqlSelect select(Ctx, Mode);
  1152. source = select.Build(alt, pos);
  1153. break;
  1154. }
  1155. case TRule_exists_expr::TBlock3::kAlt2: {
  1156. const auto& alt = rule.GetBlock3().GetAlt2().GetRule_values_stmt1();
  1157. TSqlValues values(Ctx, Mode);
  1158. source = values.Build(alt, pos);
  1159. break;
  1160. }
  1161. case TRule_exists_expr::TBlock3::ALT_NOT_SET:
  1162. AltNotImplemented("exists_expr", rule.GetBlock3());
  1163. }
  1164. if (!source) {
  1165. Ctx.IncrementMonCounter("sql_errors", "BadSource");
  1166. return nullptr;
  1167. }
  1168. const bool checkExist = true;
  1169. auto select = BuildSourceNode(Ctx.Pos(), source, checkExist, Ctx.Settings.EmitReadsForExists);
  1170. return BuildBuiltinFunc(Ctx, Ctx.Pos(), "ListHasItems", {select});
  1171. }
  1172. TNodePtr TSqlExpression::CaseRule(const TRule_case_expr& rule) {
  1173. // case_expr: CASE expr? when_expr+ (ELSE expr)? END;
  1174. // when_expr: WHEN expr THEN expr;
  1175. Ctx.IncrementMonCounter("sql_features", "Case");
  1176. const auto& alt = rule;
  1177. Token(alt.GetToken1());
  1178. TNodePtr elseExpr;
  1179. if (alt.HasBlock4()) {
  1180. Token(alt.GetBlock4().GetToken1());
  1181. TSqlExpression expr(Ctx, Mode);
  1182. elseExpr = expr.Build(alt.GetBlock4().GetRule_expr2());
  1183. } else {
  1184. Ctx.IncrementMonCounter("sql_errors", "ElseIsRequired");
  1185. Error() << "ELSE is required";
  1186. return {};
  1187. }
  1188. TNodePtr caseExpr;
  1189. if (alt.HasBlock2()) {
  1190. TSqlExpression expr(Ctx, Mode);
  1191. caseExpr = expr.Build(alt.GetBlock2().GetRule_expr1());
  1192. if (!caseExpr) {
  1193. return {};
  1194. }
  1195. }
  1196. TVector<TCaseBranch> branches;
  1197. for (size_t i = 0; i < alt.Block3Size(); ++i) {
  1198. branches.emplace_back();
  1199. const auto& block = alt.GetBlock3(i).GetRule_when_expr1();
  1200. Token(block.GetToken1());
  1201. TSqlExpression condExpr(Ctx, Mode);
  1202. branches.back().Pred = condExpr.Build(block.GetRule_expr2());
  1203. if (caseExpr) {
  1204. branches.back().Pred = BuildBinaryOp(Ctx, Ctx.Pos(), "==", caseExpr->Clone(), branches.back().Pred);
  1205. }
  1206. if (!branches.back().Pred) {
  1207. return {};
  1208. }
  1209. Token(block.GetToken3());
  1210. TSqlExpression thenExpr(Ctx, Mode);
  1211. branches.back().Value = thenExpr.Build(block.GetRule_expr4());
  1212. if (!branches.back().Value) {
  1213. return {};
  1214. }
  1215. }
  1216. auto final = ReduceCaseBranches(branches.begin(), branches.end());
  1217. return BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { final.Pred, final.Value, elseExpr });
  1218. }
  1219. TMaybe<TExprOrIdent> TSqlExpression::AtomExpr(const TRule_atom_expr& node, const TTrailingQuestions& tail) {
  1220. // atom_expr:
  1221. // literal_value
  1222. // | bind_parameter
  1223. // | lambda
  1224. // | cast_expr
  1225. // | exists_expr
  1226. // | case_expr
  1227. // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
  1228. // | value_constructor
  1229. // | bitcast_expr
  1230. // | list_literal
  1231. // | dict_literal
  1232. // | struct_literal
  1233. // ;
  1234. if (node.Alt_case() != TRule_atom_expr::kAltAtomExpr2 && tail.Count) {
  1235. UnexpectedQuestionToken(tail);
  1236. return {};
  1237. }
  1238. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (node.Alt_case() == TRule_atom_expr::kAltAtomExpr3);
  1239. TExprOrIdent result;
  1240. switch (node.Alt_case()) {
  1241. case TRule_atom_expr::kAltAtomExpr1:
  1242. Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
  1243. return LiteralExpr(node.GetAlt_atom_expr1().GetRule_literal_value1());
  1244. case TRule_atom_expr::kAltAtomExpr2:
  1245. result.Expr = BindParameterRule(node.GetAlt_atom_expr2().GetRule_bind_parameter1(), tail);
  1246. break;
  1247. case TRule_atom_expr::kAltAtomExpr3:
  1248. result.Expr = LambdaRule(node.GetAlt_atom_expr3().GetRule_lambda1());
  1249. break;
  1250. case TRule_atom_expr::kAltAtomExpr4:
  1251. result.Expr = CastRule(node.GetAlt_atom_expr4().GetRule_cast_expr1());
  1252. break;
  1253. case TRule_atom_expr::kAltAtomExpr5:
  1254. result.Expr = ExistsRule(node.GetAlt_atom_expr5().GetRule_exists_expr1());
  1255. break;
  1256. case TRule_atom_expr::kAltAtomExpr6:
  1257. result.Expr = CaseRule(node.GetAlt_atom_expr6().GetRule_case_expr1());
  1258. break;
  1259. case TRule_atom_expr::kAltAtomExpr7: {
  1260. const auto& alt = node.GetAlt_atom_expr7();
  1261. TString module(Id(alt.GetRule_an_id_or_type1(), *this));
  1262. TPosition pos(Ctx.Pos());
  1263. TString name;
  1264. switch (alt.GetBlock3().Alt_case()) {
  1265. case TRule_atom_expr::TAlt7::TBlock3::kAlt1:
  1266. name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
  1267. break;
  1268. case TRule_atom_expr::TAlt7::TBlock3::kAlt2: {
  1269. name = Token(alt.GetBlock3().GetAlt2().GetToken1());
  1270. if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
  1271. // same as previous case
  1272. name = IdContentFromString(Ctx, name);
  1273. } else {
  1274. module = "@" + module;
  1275. }
  1276. break;
  1277. }
  1278. case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET:
  1279. Y_ABORT("Unsigned number: you should change implementation according to grammar changes");
  1280. }
  1281. result.Expr = BuildCallable(pos, module, name, {});
  1282. break;
  1283. }
  1284. case TRule_atom_expr::kAltAtomExpr8: {
  1285. result.Expr = ValueConstructor(node.GetAlt_atom_expr8().GetRule_value_constructor1());
  1286. break;
  1287. }
  1288. case TRule_atom_expr::kAltAtomExpr9:
  1289. result.Expr = BitCastRule(node.GetAlt_atom_expr9().GetRule_bitcast_expr1());
  1290. break;
  1291. case TRule_atom_expr::kAltAtomExpr10:
  1292. result.Expr = ListLiteral(node.GetAlt_atom_expr10().GetRule_list_literal1());
  1293. break;
  1294. case TRule_atom_expr::kAltAtomExpr11:
  1295. result.Expr = DictLiteral(node.GetAlt_atom_expr11().GetRule_dict_literal1());
  1296. break;
  1297. case TRule_atom_expr::kAltAtomExpr12:
  1298. result.Expr = StructLiteral(node.GetAlt_atom_expr12().GetRule_struct_literal1());
  1299. break;
  1300. case TRule_atom_expr::ALT_NOT_SET:
  1301. AltNotImplemented("atom_expr", node);
  1302. }
  1303. if (!result.Expr) {
  1304. return {};
  1305. }
  1306. return result;
  1307. }
  1308. TMaybe<TExprOrIdent> TSqlExpression::InAtomExpr(const TRule_in_atom_expr& node, const TTrailingQuestions& tail) {
  1309. // in_atom_expr:
  1310. // literal_value
  1311. // | bind_parameter
  1312. // | lambda
  1313. // | cast_expr
  1314. // | case_expr
  1315. // | an_id_or_type NAMESPACE (id_or_type | STRING_VALUE)
  1316. // | LPAREN select_stmt RPAREN
  1317. // | value_constructor
  1318. // | bitcast_expr
  1319. // | list_literal
  1320. // | dict_literal
  1321. // | struct_literal
  1322. // ;
  1323. if (node.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr2 && tail.Count) {
  1324. UnexpectedQuestionToken(tail);
  1325. return {};
  1326. }
  1327. TExprOrIdent result;
  1328. switch (node.Alt_case()) {
  1329. case TRule_in_atom_expr::kAltInAtomExpr1:
  1330. Ctx.IncrementMonCounter("sql_features", "LiteralExpr");
  1331. return LiteralExpr(node.GetAlt_in_atom_expr1().GetRule_literal_value1());
  1332. case TRule_in_atom_expr::kAltInAtomExpr2:
  1333. result.Expr = BindParameterRule(node.GetAlt_in_atom_expr2().GetRule_bind_parameter1(), tail);
  1334. break;
  1335. case TRule_in_atom_expr::kAltInAtomExpr3:
  1336. result.Expr = LambdaRule(node.GetAlt_in_atom_expr3().GetRule_lambda1());
  1337. break;
  1338. case TRule_in_atom_expr::kAltInAtomExpr4:
  1339. result.Expr = CastRule(node.GetAlt_in_atom_expr4().GetRule_cast_expr1());
  1340. break;
  1341. case TRule_in_atom_expr::kAltInAtomExpr5:
  1342. result.Expr = CaseRule(node.GetAlt_in_atom_expr5().GetRule_case_expr1());
  1343. break;
  1344. case TRule_in_atom_expr::kAltInAtomExpr6: {
  1345. const auto& alt = node.GetAlt_in_atom_expr6();
  1346. TString module(Id(alt.GetRule_an_id_or_type1(), *this));
  1347. TPosition pos(Ctx.Pos());
  1348. TString name;
  1349. switch (alt.GetBlock3().Alt_case()) {
  1350. case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1:
  1351. name = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), *this);
  1352. break;
  1353. case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: {
  1354. name = Token(alt.GetBlock3().GetAlt2().GetToken1());
  1355. if (Ctx.AnsiQuotedIdentifiers && name.StartsWith('"')) {
  1356. // same as previous case
  1357. name = IdContentFromString(Ctx, name);
  1358. } else {
  1359. module = "@" + module;
  1360. }
  1361. break;
  1362. }
  1363. case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET:
  1364. Y_ABORT("You should change implementation according to grammar changes");
  1365. }
  1366. result.Expr = BuildCallable(pos, module, name, {});
  1367. break;
  1368. }
  1369. case TRule_in_atom_expr::kAltInAtomExpr7: {
  1370. Token(node.GetAlt_in_atom_expr7().GetToken1());
  1371. // reset column reference scope (select will reenable it where needed)
  1372. TColumnRefScope scope(Ctx, EColumnRefState::Deny);
  1373. TSqlSelect select(Ctx, Mode);
  1374. TPosition pos;
  1375. auto source = select.Build(node.GetAlt_in_atom_expr7().GetRule_select_stmt2(), pos);
  1376. if (!source) {
  1377. Ctx.IncrementMonCounter("sql_errors", "BadSource");
  1378. return {};
  1379. }
  1380. Ctx.IncrementMonCounter("sql_features", "InSubquery");
  1381. const auto alias = Ctx.MakeName("subquerynode");
  1382. const auto ref = Ctx.MakeName("subquery");
  1383. auto& blocks = Ctx.GetCurrentBlocks();
  1384. blocks.push_back(BuildSubquery(std::move(source), alias, Mode == NSQLTranslation::ESqlMode::SUBQUERY, -1, Ctx.Scoped));
  1385. blocks.back()->SetLabel(ref);
  1386. result.Expr = BuildSubqueryRef(blocks.back(), ref, -1);
  1387. break;
  1388. }
  1389. case TRule_in_atom_expr::kAltInAtomExpr8: {
  1390. result.Expr = ValueConstructor(node.GetAlt_in_atom_expr8().GetRule_value_constructor1());
  1391. break;
  1392. }
  1393. case TRule_in_atom_expr::kAltInAtomExpr9:
  1394. result.Expr = BitCastRule(node.GetAlt_in_atom_expr9().GetRule_bitcast_expr1());
  1395. break;
  1396. case TRule_in_atom_expr::kAltInAtomExpr10:
  1397. result.Expr = ListLiteral(node.GetAlt_in_atom_expr10().GetRule_list_literal1());
  1398. break;
  1399. case TRule_in_atom_expr::kAltInAtomExpr11:
  1400. result.Expr = DictLiteral(node.GetAlt_in_atom_expr11().GetRule_dict_literal1());
  1401. break;
  1402. case TRule_in_atom_expr::kAltInAtomExpr12:
  1403. result.Expr = StructLiteral(node.GetAlt_in_atom_expr12().GetRule_struct_literal1());
  1404. break;
  1405. case TRule_in_atom_expr::ALT_NOT_SET:
  1406. AltNotImplemented("in_atom_expr", node);
  1407. }
  1408. if (!result.Expr) {
  1409. return {};
  1410. }
  1411. return result;
  1412. }
  1413. bool TSqlExpression::SqlLambdaParams(const TNodePtr& node, TVector<TSymbolNameWithPos>& args, ui32& optionalArgumentsCount) {
  1414. args.clear();
  1415. optionalArgumentsCount = 0;
  1416. auto errMsg = TStringBuf("Invalid lambda arguments syntax. Lambda arguments should start with '$' as named value.");
  1417. auto tupleNodePtr = node->GetTupleNode();;
  1418. if (!tupleNodePtr) {
  1419. Ctx.Error(node->GetPos()) << errMsg;
  1420. return false;
  1421. }
  1422. THashSet<TString> dupArgsChecker;
  1423. for (const auto& argPtr: tupleNodePtr->Elements()) {
  1424. auto contentPtr = argPtr->GetAtomContent();
  1425. if (!contentPtr || !contentPtr->StartsWith("$")) {
  1426. Ctx.Error(argPtr->GetPos()) << errMsg;
  1427. return false;
  1428. }
  1429. if (argPtr->IsOptionalArg()) {
  1430. ++optionalArgumentsCount;
  1431. } else if (optionalArgumentsCount > 0) {
  1432. Ctx.Error(argPtr->GetPos()) << "Non-optional argument can not follow optional one";
  1433. return false;
  1434. }
  1435. if (!IsAnonymousName(*contentPtr) && !dupArgsChecker.insert(*contentPtr).second) {
  1436. Ctx.Error(argPtr->GetPos()) << "Duplicate lambda argument parametr: '" << *contentPtr << "'.";
  1437. return false;
  1438. }
  1439. args.push_back(TSymbolNameWithPos{*contentPtr, argPtr->GetPos()});
  1440. }
  1441. return true;
  1442. }
  1443. bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_expr& node, TVector<TNodePtr>& exprSeq) {
  1444. TSqlExpression expr(ctx, ctx.Settings.Mode);
  1445. TNodePtr nodeExpr = expr.Build(node);
  1446. if (!nodeExpr) {
  1447. return false;
  1448. }
  1449. exprSeq.push_back(nodeExpr);
  1450. return true;
  1451. }
  1452. bool TSqlExpression::SqlLambdaExprBody(TContext& ctx, const TRule_lambda_body& node, TVector<TNodePtr>& exprSeq) {
  1453. TSqlExpression expr(ctx, ctx.Settings.Mode);
  1454. TVector<TString> localNames;
  1455. bool hasError = false;
  1456. for (auto& block: node.GetBlock2()) {
  1457. const auto& rule = block.GetRule_lambda_stmt1();
  1458. switch (rule.Alt_case()) {
  1459. case TRule_lambda_stmt::kAltLambdaStmt1: {
  1460. TVector<TSymbolNameWithPos> names;
  1461. auto nodeExpr = NamedNode(rule.GetAlt_lambda_stmt1().GetRule_named_nodes_stmt1(), names);
  1462. if (!nodeExpr) {
  1463. hasError = true;
  1464. continue;
  1465. } else if (nodeExpr->GetSource()) {
  1466. ctx.Error() << "SELECT is not supported inside lambda body";
  1467. hasError = true;
  1468. continue;
  1469. }
  1470. if (names.size() > 1) {
  1471. auto ref = ctx.MakeName("tie");
  1472. exprSeq.push_back(nodeExpr->Y("EnsureTupleSize", nodeExpr, nodeExpr->Q(ToString(names.size()))));
  1473. exprSeq.back()->SetLabel(ref);
  1474. for (size_t i = 0; i < names.size(); ++i) {
  1475. TNodePtr nthExpr = nodeExpr->Y("Nth", ref, nodeExpr->Q(ToString(i)));
  1476. names[i].Name = PushNamedAtom(names[i].Pos, names[i].Name);
  1477. nthExpr->SetLabel(names[i].Name);
  1478. localNames.push_back(names[i].Name);
  1479. exprSeq.push_back(nthExpr);
  1480. }
  1481. } else {
  1482. auto& symbol = names.front();
  1483. symbol.Name = PushNamedAtom(symbol.Pos, symbol.Name);
  1484. nodeExpr->SetLabel(symbol.Name);
  1485. localNames.push_back(symbol.Name);
  1486. exprSeq.push_back(nodeExpr);
  1487. }
  1488. break;
  1489. }
  1490. case TRule_lambda_stmt::kAltLambdaStmt2: {
  1491. if (!ImportStatement(rule.GetAlt_lambda_stmt2().GetRule_import_stmt1(), &localNames)) {
  1492. hasError = true;
  1493. }
  1494. break;
  1495. }
  1496. case TRule_lambda_stmt::ALT_NOT_SET:
  1497. Y_ABORT("SampleClause: does not correspond to grammar changes");
  1498. }
  1499. }
  1500. TNodePtr nodeExpr;
  1501. if (!hasError) {
  1502. nodeExpr = expr.Build(node.GetRule_expr4());
  1503. }
  1504. for (const auto& name : localNames) {
  1505. PopNamedNode(name);
  1506. }
  1507. if (!nodeExpr) {
  1508. return false;
  1509. }
  1510. exprSeq.push_back(nodeExpr);
  1511. return true;
  1512. }
  1513. TNodePtr TSqlExpression::SubExpr(const TRule_con_subexpr& node, const TTrailingQuestions& tail) {
  1514. // con_subexpr: unary_subexpr | unary_op unary_subexpr;
  1515. switch (node.Alt_case()) {
  1516. case TRule_con_subexpr::kAltConSubexpr1:
  1517. return UnaryExpr(node.GetAlt_con_subexpr1().GetRule_unary_subexpr1(), tail);
  1518. case TRule_con_subexpr::kAltConSubexpr2: {
  1519. MaybeUnnamedSmartParenOnTop = false;
  1520. Ctx.IncrementMonCounter("sql_features", "UnaryOperation");
  1521. TString opName;
  1522. auto token = node.GetAlt_con_subexpr2().GetRule_unary_op1().GetToken1();
  1523. Token(token);
  1524. TPosition pos(Ctx.Pos());
  1525. auto tokenId = token.GetId();
  1526. if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, NOT)) {
  1527. opName = "Not";
  1528. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, PLUS)) {
  1529. opName = "Plus";
  1530. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, MINUS)) {
  1531. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMinus" : "Minus";
  1532. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, TILDA)) {
  1533. opName = "BitNot";
  1534. } else {
  1535. Ctx.IncrementMonCounter("sql_errors", "UnsupportedUnaryOperation");
  1536. Error() << "Unsupported unary operation: " << token.GetValue();
  1537. return nullptr;
  1538. }
  1539. Ctx.IncrementMonCounter("sql_unary_operations", opName);
  1540. auto expr = UnaryExpr(node.GetAlt_con_subexpr2().GetRule_unary_subexpr2(), tail);
  1541. return expr ? expr->ApplyUnaryOp(Ctx, pos, opName) : expr;
  1542. }
  1543. case TRule_con_subexpr::ALT_NOT_SET:
  1544. Y_ABORT("You should change implementation according to grammar changes");
  1545. }
  1546. return nullptr;
  1547. }
  1548. TNodePtr TSqlExpression::SubExpr(const TRule_xor_subexpr& node, const TTrailingQuestions& tail) {
  1549. // xor_subexpr: eq_subexpr cond_expr?;
  1550. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && !node.HasBlock2();
  1551. TNodePtr res(SubExpr(node.GetRule_eq_subexpr1(), node.HasBlock2() ? TTrailingQuestions{} : tail));
  1552. if (!res) {
  1553. return {};
  1554. }
  1555. TPosition pos(Ctx.Pos());
  1556. if (node.HasBlock2()) {
  1557. auto cond = node.GetBlock2().GetRule_cond_expr1();
  1558. switch (cond.Alt_case()) {
  1559. case TRule_cond_expr::kAltCondExpr1: {
  1560. const auto& matchOp = cond.GetAlt_cond_expr1();
  1561. const bool notMatch = matchOp.HasBlock1();
  1562. const TCiString& opName = Token(matchOp.GetRule_match_op2().GetToken1());
  1563. const auto& pattern = SubExpr(cond.GetAlt_cond_expr1().GetRule_eq_subexpr3(), matchOp.HasBlock4() ? TTrailingQuestions{} : tail);
  1564. if (!pattern) {
  1565. return {};
  1566. }
  1567. TNodePtr isMatch;
  1568. if (opName == "like" || opName == "ilike") {
  1569. const TString* escapeLiteral = nullptr;
  1570. TNodePtr escapeNode;
  1571. const auto& escaper = BuildUdf(Ctx, pos, "Re2", "PatternFromLike", {});
  1572. TVector<TNodePtr> escaperArgs({ escaper, pattern });
  1573. if (matchOp.HasBlock4()) {
  1574. const auto& escapeBlock = matchOp.GetBlock4();
  1575. TNodePtr escapeExpr = SubExpr(escapeBlock.GetRule_eq_subexpr2(), tail);
  1576. if (!escapeExpr) {
  1577. return {};
  1578. }
  1579. escapeLiteral = escapeExpr->GetLiteral("String");
  1580. escapeNode = escapeExpr;
  1581. if (escapeLiteral) {
  1582. Ctx.IncrementMonCounter("sql_features", "LikeEscape");
  1583. if (escapeLiteral->size() != 1) {
  1584. Ctx.IncrementMonCounter("sql_errors", "LikeMultiCharEscape");
  1585. Error() << "ESCAPE clause requires single character argument";
  1586. return nullptr;
  1587. }
  1588. if (escapeLiteral[0] == "%" || escapeLiteral[0] == "_" || escapeLiteral[0] == "\\") {
  1589. Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
  1590. Error() << "'%', '_' and '\\' are currently not supported in ESCAPE clause, ";
  1591. Error() << "please choose any other character";
  1592. return nullptr;
  1593. }
  1594. if (!IsAscii(escapeLiteral->front())) {
  1595. Ctx.IncrementMonCounter("sql_errors", "LikeUnsupportedEscapeChar");
  1596. Error() << "Non-ASCII symbols are not supported in ESCAPE clause, ";
  1597. Error() << "please choose ASCII character";
  1598. return nullptr;
  1599. }
  1600. escaperArgs.push_back(BuildLiteralRawString(pos, *escapeLiteral));
  1601. } else {
  1602. Ctx.IncrementMonCounter("sql_errors", "LikeNotLiteralEscape");
  1603. Error() << "ESCAPE clause requires String literal argument";
  1604. return nullptr;
  1605. }
  1606. }
  1607. auto re2options = BuildUdf(Ctx, pos, "Re2", "Options", {});
  1608. if (opName == "ilike") {
  1609. Ctx.IncrementMonCounter("sql_features", "CaseInsensitiveLike");
  1610. }
  1611. auto csModeLiteral = BuildLiteralBool(pos, opName != "ilike");
  1612. csModeLiteral->SetLabel("CaseSensitive");
  1613. auto csOption = BuildStructure(pos, { csModeLiteral });
  1614. auto optionsApply = new TCallNodeImpl(pos, "NamedApply", { re2options, BuildTuple(pos, {}), csOption });
  1615. const TNodePtr escapedPattern = new TCallNodeImpl(pos, "Apply", { escaperArgs });
  1616. auto list = new TAstListNodeImpl(pos, { escapedPattern, optionsApply });
  1617. auto runConfig = new TAstListNodeImpl(pos, { new TAstAtomNodeImpl(pos, "quote", 0), list });
  1618. const TNodePtr matcher = new TCallNodeImpl(pos, "AssumeStrict", { BuildUdf(Ctx, pos, "Re2", "Match", { runConfig }) });
  1619. isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
  1620. bool isUtf8 = false;
  1621. const TString* literalPattern = pattern->GetLiteral("String");
  1622. if (!literalPattern) {
  1623. literalPattern = pattern->GetLiteral("Utf8");
  1624. isUtf8 = literalPattern != nullptr;
  1625. }
  1626. if (literalPattern) {
  1627. bool inEscape = false;
  1628. TMaybe<char> escape;
  1629. if (escapeLiteral) {
  1630. escape = escapeLiteral->front();
  1631. }
  1632. bool mayIgnoreCase;
  1633. TVector<TPatternComponent<char>> components;
  1634. if (isUtf8) {
  1635. auto splitResult = SplitPattern(UTF8ToUTF32<false>(*literalPattern), escape, inEscape);
  1636. for (const auto& component : splitResult) {
  1637. TPatternComponent<char> converted;
  1638. converted.IsSimple = component.IsSimple;
  1639. converted.Prefix = WideToUTF8(component.Prefix);
  1640. converted.Suffix = WideToUTF8(component.Suffix);
  1641. components.push_back(std::move(converted));
  1642. }
  1643. mayIgnoreCase = ToLowerUTF8(*literalPattern) == ToUpperUTF8(*literalPattern);
  1644. } else {
  1645. components = SplitPattern(*literalPattern, escape, inEscape);
  1646. mayIgnoreCase = WithoutAlpha(*literalPattern);
  1647. }
  1648. if (inEscape) {
  1649. Ctx.IncrementMonCounter("sql_errors", "LikeEscapeSymbolEnd");
  1650. Error() << "LIKE pattern should not end with escape symbol";
  1651. return nullptr;
  1652. }
  1653. if (opName == "like" || mayIgnoreCase) {
  1654. // TODO: expand LIKE in optimizers - we can analyze argument types there
  1655. YQL_ENSURE(!components.empty());
  1656. const auto& first = components.front();
  1657. if (components.size() == 1 && first.IsSimple) {
  1658. // no '%'s and '_'s in pattern
  1659. YQL_ENSURE(first.Prefix == first.Suffix);
  1660. isMatch = BuildBinaryOp(Ctx, pos, "==", res, BuildLiteralRawString(pos, first.Suffix, isUtf8));
  1661. } else if (!first.Prefix.empty()) {
  1662. const TString& prefix = first.Prefix;
  1663. TNodePtr prefixMatch;
  1664. if (Ctx.EmitStartsWith) {
  1665. prefixMatch = BuildBinaryOp(Ctx, pos, "StartsWith", res, BuildLiteralRawString(pos, prefix, isUtf8));
  1666. } else {
  1667. prefixMatch = BuildBinaryOp(Ctx, pos, ">=", res, BuildLiteralRawString(pos, prefix, isUtf8));
  1668. auto upperBound = isUtf8 ? NextValidUtf8(prefix) : NextLexicographicString(prefix);
  1669. if (upperBound) {
  1670. prefixMatch = BuildBinaryOp(
  1671. Ctx,
  1672. pos,
  1673. "And",
  1674. prefixMatch,
  1675. BuildBinaryOp(Ctx, pos, "<", res, BuildLiteralRawString(pos, TString(*upperBound), isUtf8))
  1676. );
  1677. }
  1678. }
  1679. if (Ctx.AnsiLike && first.IsSimple && components.size() == 2 && components.back().IsSimple) {
  1680. const TString& suffix = components.back().Suffix;
  1681. // 'prefix%suffix'
  1682. if (suffix.empty()) {
  1683. isMatch = prefixMatch;
  1684. } else {
  1685. // len(str) >= len(prefix) + len(suffix) && StartsWith(str, prefix) && EndsWith(str, suffix)
  1686. TNodePtr sizePred = BuildBinaryOp(Ctx, pos, ">=",
  1687. TNodePtr(new TCallNodeImpl(pos, "Size", { res })),
  1688. TNodePtr(new TLiteralNumberNode<ui32>(pos, "Uint32", ToString(prefix.size() + suffix.size()))));
  1689. TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
  1690. isMatch = new TCallNodeImpl(pos, "And", {
  1691. sizePred,
  1692. prefixMatch,
  1693. suffixMatch
  1694. });
  1695. }
  1696. } else {
  1697. isMatch = BuildBinaryOp(Ctx, pos, "And", prefixMatch, isMatch);
  1698. }
  1699. } else if (Ctx.AnsiLike && AllOf(components, [](const auto& comp) { return comp.IsSimple; })) {
  1700. YQL_ENSURE(first.Prefix.empty());
  1701. if (components.size() == 3 && components.back().Prefix.empty()) {
  1702. // '%foo%'
  1703. YQL_ENSURE(!components[1].Prefix.empty());
  1704. isMatch = BuildBinaryOp(Ctx, pos, "StringContains", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
  1705. } else if (components.size() == 2) {
  1706. // '%foo'
  1707. isMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, components[1].Prefix, isUtf8));
  1708. }
  1709. } else if (Ctx.AnsiLike && !components.back().Suffix.empty()) {
  1710. const TString& suffix = components.back().Suffix;
  1711. TNodePtr suffixMatch = BuildBinaryOp(Ctx, pos, "EndsWith", res, BuildLiteralRawString(pos, suffix, isUtf8));
  1712. isMatch = BuildBinaryOp(Ctx, pos, "And", suffixMatch, isMatch);
  1713. }
  1714. // TODO: more StringContains/StartsWith/EndsWith cases?
  1715. }
  1716. }
  1717. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotLike" : "Like");
  1718. } else if (opName == "regexp" || opName == "rlike" || opName == "match") {
  1719. if (matchOp.HasBlock4()) {
  1720. Ctx.IncrementMonCounter("sql_errors", "RegexpEscape");
  1721. TString opNameUpper(opName);
  1722. opNameUpper.to_upper();
  1723. Error() << opName << " and ESCAPE clauses should not be used together";
  1724. return nullptr;
  1725. }
  1726. if (!Ctx.PragmaRegexUseRe2) {
  1727. Ctx.Warning(pos, TIssuesIds::CORE_LEGACY_REGEX_ENGINE) << "Legacy regex engine works incorrectly with unicode. Use PRAGMA RegexUseRe2='true';";
  1728. }
  1729. const auto& matcher = Ctx.PragmaRegexUseRe2 ?
  1730. BuildUdf(Ctx, pos, "Re2", opName == "match" ? "Match" : "Grep", {BuildTuple(pos, {pattern, BuildLiteralNull(pos)})}):
  1731. BuildUdf(Ctx, pos, "Pcre", opName == "match" ? "BacktrackingMatch" : "BacktrackingGrep", { pattern });
  1732. isMatch = new TCallNodeImpl(pos, "Apply", { matcher, res });
  1733. if (opName != "match") {
  1734. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotRegexp" : "Regexp");
  1735. } else {
  1736. Ctx.IncrementMonCounter("sql_features", notMatch ? "NotMatch" : "Match");
  1737. }
  1738. } else {
  1739. Ctx.IncrementMonCounter("sql_errors", "UnknownMatchOp");
  1740. AltNotImplemented("match_op", cond);
  1741. return nullptr;
  1742. }
  1743. return (notMatch && isMatch) ? isMatch->ApplyUnaryOp(Ctx, pos, "Not") : isMatch;
  1744. }
  1745. case TRule_cond_expr::kAltCondExpr2: {
  1746. // | NOT? IN COMPACT? in_expr
  1747. auto altInExpr = cond.GetAlt_cond_expr2();
  1748. const bool notIn = altInExpr.HasBlock1();
  1749. auto hints = BuildTuple(pos, {});
  1750. bool isCompact = altInExpr.HasBlock3();
  1751. if (!isCompact) {
  1752. auto sqlHints = Ctx.PullHintForToken(Ctx.TokenPosition(altInExpr.GetToken2()));
  1753. isCompact = AnyOf(sqlHints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
  1754. }
  1755. if (isCompact) {
  1756. Ctx.IncrementMonCounter("sql_features", "IsCompactHint");
  1757. auto sizeHint = BuildTuple(pos, { BuildQuotedAtom(pos, "isCompact", NYql::TNodeFlags::Default) });
  1758. hints = BuildTuple(pos, { sizeHint });
  1759. }
  1760. TSqlExpression inSubexpr(Ctx, Mode);
  1761. auto inRight = inSubexpr.SqlInExpr(altInExpr.GetRule_in_expr4(), tail);
  1762. auto isIn = BuildBuiltinFunc(Ctx, pos, "In", {res, inRight, hints});
  1763. Ctx.IncrementMonCounter("sql_features", notIn ? "NotIn" : "In");
  1764. return (notIn && isIn) ? isIn->ApplyUnaryOp(Ctx, pos, "Not") : isIn;
  1765. }
  1766. case TRule_cond_expr::kAltCondExpr3: {
  1767. if (tail.Count) {
  1768. UnexpectedQuestionToken(tail);
  1769. return {};
  1770. }
  1771. auto altCase = cond.GetAlt_cond_expr3().GetBlock1().Alt_case();
  1772. const bool notNoll =
  1773. altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt2 ||
  1774. altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4
  1775. ;
  1776. if (altCase == TRule_cond_expr::TAlt3::TBlock1::kAlt4 &&
  1777. !cond.GetAlt_cond_expr3().GetBlock1().GetAlt4().HasBlock1())
  1778. {
  1779. Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_MISSING_IS_BEFORE_NOT_NULL) << "Missing IS keyword before NOT NULL";
  1780. }
  1781. auto isNull = BuildIsNullOp(pos, res);
  1782. Ctx.IncrementMonCounter("sql_features", notNoll ? "NotNull" : "Null");
  1783. return (notNoll && isNull) ? isNull->ApplyUnaryOp(Ctx, pos, "Not") : isNull;
  1784. }
  1785. case TRule_cond_expr::kAltCondExpr4: {
  1786. auto alt = cond.GetAlt_cond_expr4();
  1787. const bool symmetric = alt.HasBlock3() && IS_TOKEN(Ctx.Settings.Antlr4Parser, alt.GetBlock3().GetToken1().GetId(), SYMMETRIC);
  1788. const bool negation = alt.HasBlock1();
  1789. TNodePtr left = SubExpr(alt.GetRule_eq_subexpr4(), {});
  1790. TNodePtr right = SubExpr(alt.GetRule_eq_subexpr6(), tail);
  1791. if (!left || !right) {
  1792. return {};
  1793. }
  1794. const bool bothArgNull = left->IsNull() && right->IsNull();
  1795. const bool oneArgNull = left->IsNull() || right->IsNull();
  1796. if (res->IsNull() || bothArgNull || (symmetric && oneArgNull)) {
  1797. Ctx.Warning(pos, TIssuesIds::YQL_OPERATION_WILL_RETURN_NULL)
  1798. << "BETWEEN operation will return NULL here";
  1799. }
  1800. auto buildSubexpr = [&](const TNodePtr& left, const TNodePtr& right) {
  1801. if (negation) {
  1802. return BuildBinaryOpRaw(
  1803. pos,
  1804. "Or",
  1805. BuildBinaryOpRaw(pos, "<", res, left),
  1806. BuildBinaryOpRaw(pos, ">", res, right)
  1807. );
  1808. } else {
  1809. return BuildBinaryOpRaw(
  1810. pos,
  1811. "And",
  1812. BuildBinaryOpRaw(pos, ">=", res, left),
  1813. BuildBinaryOpRaw(pos, "<=", res, right)
  1814. );
  1815. }
  1816. };
  1817. if (symmetric) {
  1818. Ctx.IncrementMonCounter("sql_features", negation? "NotBetweenSymmetric" : "BetweenSymmetric");
  1819. return BuildBinaryOpRaw(
  1820. pos,
  1821. negation? "And" : "Or",
  1822. buildSubexpr(left, right),
  1823. buildSubexpr(right, left)
  1824. );
  1825. } else {
  1826. Ctx.IncrementMonCounter("sql_features", negation? "NotBetween" : "Between");
  1827. return buildSubexpr(left, right);
  1828. }
  1829. }
  1830. case TRule_cond_expr::kAltCondExpr5: {
  1831. auto alt = cond.GetAlt_cond_expr5();
  1832. auto getNode = [](const TRule_cond_expr::TAlt5::TBlock1& b) -> const TRule_eq_subexpr& { return b.GetRule_eq_subexpr2(); };
  1833. return BinOpList(node.GetRule_eq_subexpr1(), getNode, alt.GetBlock1().begin(), alt.GetBlock1().end(), tail);
  1834. }
  1835. case TRule_cond_expr::ALT_NOT_SET:
  1836. Ctx.IncrementMonCounter("sql_errors", "UnknownConditionExpr");
  1837. AltNotImplemented("cond_expr", cond);
  1838. return nullptr;
  1839. }
  1840. }
  1841. return res;
  1842. }
  1843. TNodePtr TSqlExpression::BinOperList(const TString& opName, TVector<TNodePtr>::const_iterator begin, TVector<TNodePtr>::const_iterator end) const {
  1844. TPosition pos(Ctx.Pos());
  1845. const size_t opCount = end - begin;
  1846. Y_DEBUG_ABORT_UNLESS(opCount >= 2);
  1847. if (opCount == 2) {
  1848. return BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1));
  1849. } if (opCount == 3) {
  1850. return BuildBinaryOp(Ctx, pos, opName, BuildBinaryOp(Ctx, pos, opName, *begin, *(begin+1)), *(begin+2));
  1851. } else {
  1852. auto mid = begin + opCount / 2;
  1853. return BuildBinaryOp(Ctx, pos, opName, BinOperList(opName, begin, mid), BinOperList(opName, mid, end));
  1854. }
  1855. }
  1856. TSqlExpression::TCaseBranch TSqlExpression::ReduceCaseBranches(TVector<TCaseBranch>::const_iterator begin, TVector<TCaseBranch>::const_iterator end) const {
  1857. YQL_ENSURE(begin < end);
  1858. const size_t branchCount = end - begin;
  1859. if (branchCount == 1) {
  1860. return *begin;
  1861. }
  1862. auto mid = begin + branchCount / 2;
  1863. auto left = ReduceCaseBranches(begin, mid);
  1864. auto right = ReduceCaseBranches(mid, end);
  1865. TVector<TNodePtr> preds;
  1866. preds.reserve(branchCount);
  1867. for (auto it = begin; it != end; ++it) {
  1868. preds.push_back(it->Pred);
  1869. }
  1870. TCaseBranch result;
  1871. result.Pred = new TCallNodeImpl(Ctx.Pos(), "Or", CloneContainer(preds));
  1872. result.Value = BuildBuiltinFunc(Ctx, Ctx.Pos(), "If", { left.Pred, left.Value, right.Value });
  1873. return result;
  1874. }
  1875. template <typename TNode, typename TGetNode, typename TIter>
  1876. TNodePtr TSqlExpression::BinOper(const TString& opName, const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1877. if (begin == end) {
  1878. return SubExpr(node, tail);
  1879. }
  1880. // can't have top level smart_parenthesis node if any binary operation is present
  1881. MaybeUnnamedSmartParenOnTop = false;
  1882. Ctx.IncrementMonCounter("sql_binary_operations", opName);
  1883. const size_t listSize = end - begin;
  1884. TVector<TNodePtr> nodes;
  1885. nodes.reserve(1 + listSize);
  1886. nodes.push_back(SubExpr(node, {}));
  1887. for (; begin != end; ++begin) {
  1888. nodes.push_back(SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  1889. }
  1890. return BinOperList(opName, nodes.begin(), nodes.end());
  1891. }
  1892. template <typename TNode, typename TGetNode, typename TIter>
  1893. TNodePtr TSqlExpression::BinOpList(const TNode& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1894. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  1895. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  1896. while (begin != end) {
  1897. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  1898. Token(begin->GetToken1());
  1899. TPosition pos(Ctx.Pos());
  1900. TString opName;
  1901. auto tokenId = begin->GetToken1().GetId();
  1902. if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, LESS)) {
  1903. opName = "<";
  1904. Ctx.IncrementMonCounter("sql_binary_operations", "Less");
  1905. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, LESS_OR_EQ)) {
  1906. opName = "<=";
  1907. Ctx.IncrementMonCounter("sql_binary_operations", "LessOrEq");
  1908. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, GREATER)) {
  1909. opName = ">";
  1910. Ctx.IncrementMonCounter("sql_binary_operations", "Greater");
  1911. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, GREATER_OR_EQ)) {
  1912. opName = ">=";
  1913. Ctx.IncrementMonCounter("sql_binary_operations", "GreaterOrEq");
  1914. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, PLUS)) {
  1915. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedAdd" : "+MayWarn";
  1916. Ctx.IncrementMonCounter("sql_binary_operations", "Plus");
  1917. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, MINUS)) {
  1918. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedSub" : "-MayWarn";
  1919. Ctx.IncrementMonCounter("sql_binary_operations", "Minus");
  1920. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, ASTERISK)) {
  1921. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMul" : "*MayWarn";
  1922. Ctx.IncrementMonCounter("sql_binary_operations", "Multiply");
  1923. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, SLASH)) {
  1924. opName = "/MayWarn";
  1925. Ctx.IncrementMonCounter("sql_binary_operations", "Divide");
  1926. if (!Ctx.Scoped->PragmaClassicDivision && partialResult) {
  1927. partialResult = new TCallNodeImpl(pos, "SafeCast", {std::move(partialResult), BuildDataType(pos, "Double")});
  1928. } else if (Ctx.Scoped->PragmaCheckedOps) {
  1929. opName = "CheckedDiv";
  1930. }
  1931. } else if (IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, PERCENT)) {
  1932. opName = Ctx.Scoped->PragmaCheckedOps ? "CheckedMod" : "%MayWarn";
  1933. Ctx.IncrementMonCounter("sql_binary_operations", "Mod");
  1934. } else {
  1935. Ctx.IncrementMonCounter("sql_errors", "UnsupportedBinaryOperation");
  1936. Error() << "Unsupported binary operation token: " << tokenId;
  1937. return nullptr;
  1938. }
  1939. partialResult = BuildBinaryOp(Ctx, pos, opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  1940. ++begin;
  1941. }
  1942. return partialResult;
  1943. }
  1944. template <typename TGetNode, typename TIter>
  1945. TNodePtr TSqlExpression::BinOpList(const TRule_bit_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  1946. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  1947. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  1948. while (begin != end) {
  1949. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  1950. TString opName;
  1951. switch (begin->GetBlock1().Alt_case()) {
  1952. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt1: {
  1953. Token(begin->GetBlock1().GetAlt1().GetToken1());
  1954. auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
  1955. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, SHIFT_LEFT)) {
  1956. Error() << "Unsupported binary operation token: " << tokenId;
  1957. return {};
  1958. }
  1959. opName = "ShiftLeft";
  1960. Ctx.IncrementMonCounter("sql_binary_operations", "ShiftLeft");
  1961. break;
  1962. }
  1963. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt2: {
  1964. opName = "ShiftRight";
  1965. Ctx.IncrementMonCounter("sql_binary_operations", "ShiftRight");
  1966. break;
  1967. }
  1968. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt3: {
  1969. Token(begin->GetBlock1().GetAlt3().GetToken1());
  1970. auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
  1971. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, ROT_LEFT)) {
  1972. Error() << "Unsupported binary operation token: " << tokenId;
  1973. return {};
  1974. }
  1975. opName = "RotLeft";
  1976. Ctx.IncrementMonCounter("sql_binary_operations", "RotLeft");
  1977. break;
  1978. }
  1979. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt4: {
  1980. opName = "RotRight";
  1981. Ctx.IncrementMonCounter("sql_binary_operations", "RotRight");
  1982. break;
  1983. }
  1984. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt5: {
  1985. Token(begin->GetBlock1().GetAlt5().GetToken1());
  1986. auto tokenId = begin->GetBlock1().GetAlt5().GetToken1().GetId();
  1987. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, AMPERSAND)) {
  1988. Error() << "Unsupported binary operation token: " << tokenId;
  1989. return {};
  1990. }
  1991. opName = "BitAnd";
  1992. Ctx.IncrementMonCounter("sql_binary_operations", "BitAnd");
  1993. break;
  1994. }
  1995. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt6: {
  1996. Token(begin->GetBlock1().GetAlt6().GetToken1());
  1997. auto tokenId = begin->GetBlock1().GetAlt6().GetToken1().GetId();
  1998. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, PIPE)) {
  1999. Error() << "Unsupported binary operation token: " << tokenId;
  2000. return {};
  2001. }
  2002. opName = "BitOr";
  2003. Ctx.IncrementMonCounter("sql_binary_operations", "BitOr");
  2004. break;
  2005. }
  2006. case TRule_neq_subexpr_TBlock2_TBlock1::kAlt7: {
  2007. Token(begin->GetBlock1().GetAlt7().GetToken1());
  2008. auto tokenId = begin->GetBlock1().GetAlt7().GetToken1().GetId();
  2009. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, CARET)) {
  2010. Error() << "Unsupported binary operation token: " << tokenId;
  2011. return {};
  2012. }
  2013. opName = "BitXor";
  2014. Ctx.IncrementMonCounter("sql_binary_operations", "BitXor");
  2015. break;
  2016. }
  2017. case TRule_neq_subexpr_TBlock2_TBlock1::ALT_NOT_SET:
  2018. Y_ABORT("You should change implementation according to grammar changes");
  2019. }
  2020. partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  2021. ++begin;
  2022. }
  2023. return partialResult;
  2024. }
  2025. template <typename TGetNode, typename TIter>
  2026. TNodePtr TSqlExpression::BinOpList(const TRule_eq_subexpr& node, TGetNode getNode, TIter begin, TIter end, const TTrailingQuestions& tail) {
  2027. MaybeUnnamedSmartParenOnTop = MaybeUnnamedSmartParenOnTop && (begin == end);
  2028. TNodePtr partialResult = SubExpr(node, (begin == end) ? tail : TTrailingQuestions{});
  2029. while (begin != end) {
  2030. Ctx.IncrementMonCounter("sql_features", "BinaryOperation");
  2031. TString opName;
  2032. switch (begin->GetBlock1().Alt_case()) {
  2033. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt1: {
  2034. Token(begin->GetBlock1().GetAlt1().GetToken1());
  2035. auto tokenId = begin->GetBlock1().GetAlt1().GetToken1().GetId();
  2036. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, EQUALS)) {
  2037. Error() << "Unsupported binary operation token: " << tokenId;
  2038. return {};
  2039. }
  2040. Ctx.IncrementMonCounter("sql_binary_operations", "Equals");
  2041. opName = "==";
  2042. break;
  2043. }
  2044. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt2: {
  2045. Token(begin->GetBlock1().GetAlt2().GetToken1());
  2046. auto tokenId = begin->GetBlock1().GetAlt2().GetToken1().GetId();
  2047. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, EQUALS2)) {
  2048. Error() << "Unsupported binary operation token: " << tokenId;
  2049. return {};
  2050. }
  2051. Ctx.IncrementMonCounter("sql_binary_operations", "Equals2");
  2052. opName = "==";
  2053. break;
  2054. }
  2055. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt3: {
  2056. Token(begin->GetBlock1().GetAlt3().GetToken1());
  2057. auto tokenId = begin->GetBlock1().GetAlt3().GetToken1().GetId();
  2058. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, NOT_EQUALS)) {
  2059. Error() << "Unsupported binary operation token: " << tokenId;
  2060. return {};
  2061. }
  2062. Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals");
  2063. opName = "!=";
  2064. break;
  2065. }
  2066. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt4: {
  2067. Token(begin->GetBlock1().GetAlt4().GetToken1());
  2068. auto tokenId = begin->GetBlock1().GetAlt4().GetToken1().GetId();
  2069. if (!IS_TOKEN(Ctx.Settings.Antlr4Parser, tokenId, NOT_EQUALS2)) {
  2070. Error() << "Unsupported binary operation token: " << tokenId;
  2071. return {};
  2072. }
  2073. Ctx.IncrementMonCounter("sql_binary_operations", "NotEquals2");
  2074. opName = "!=";
  2075. break;
  2076. }
  2077. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::kAlt5: {
  2078. Token(begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().GetToken1());
  2079. opName = begin->GetBlock1().GetAlt5().GetRule_distinct_from_op1().HasBlock2() ? "IsNotDistinctFrom" : "IsDistinctFrom";
  2080. Ctx.IncrementMonCounter("sql_binary_operations", opName);
  2081. break;
  2082. }
  2083. case TRule_cond_expr::TAlt5::TBlock1::TBlock1::ALT_NOT_SET:
  2084. Y_ABORT("You should change implementation according to grammar changes");
  2085. }
  2086. partialResult = BuildBinaryOp(Ctx, Ctx.Pos(), opName, partialResult, SubExpr(getNode(*begin), (begin + 1 == end) ? tail : TTrailingQuestions{}));
  2087. ++begin;
  2088. }
  2089. return partialResult;
  2090. }
  2091. TNodePtr TSqlExpression::SqlInExpr(const TRule_in_expr& node, const TTrailingQuestions& tail) {
  2092. TSqlExpression expr(Ctx, Mode);
  2093. expr.SetSmartParenthesisMode(TSqlExpression::ESmartParenthesis::InStatement);
  2094. auto result = expr.UnaryExpr(node.GetRule_in_unary_subexpr1(), tail);
  2095. return result;
  2096. }
  2097. TNodePtr TSqlExpression::SmartParenthesis(const TRule_smart_parenthesis& node) {
  2098. TVector<TNodePtr> exprs;
  2099. Token(node.GetToken1());
  2100. const TPosition pos(Ctx.Pos());
  2101. const bool isTuple = node.HasBlock3();
  2102. bool expectTuple = SmartParenthesisMode == ESmartParenthesis::InStatement;
  2103. EExpr mode = EExpr::Regular;
  2104. if (SmartParenthesisMode == ESmartParenthesis::SqlLambdaParams) {
  2105. mode = EExpr::SqlLambdaParams;
  2106. expectTuple = true;
  2107. }
  2108. if (node.HasBlock2() && !NamedExprList(node.GetBlock2().GetRule_named_expr_list1(), exprs, mode)) {
  2109. return {};
  2110. }
  2111. bool topLevelGroupBy = MaybeUnnamedSmartParenOnTop && SmartParenthesisMode == ESmartParenthesis::GroupBy;
  2112. bool hasAliases = false;
  2113. bool hasUnnamed = false;
  2114. for (const auto& expr: exprs) {
  2115. if (expr->GetLabel()) {
  2116. hasAliases = true;
  2117. } else {
  2118. hasUnnamed = true;
  2119. }
  2120. if (hasAliases && hasUnnamed && !topLevelGroupBy) {
  2121. Ctx.IncrementMonCounter("sql_errors", "AnonymousStructMembers");
  2122. Ctx.Error(pos) << "Structure does not allow anonymous members";
  2123. return nullptr;
  2124. }
  2125. }
  2126. if (exprs.size() == 1 && hasUnnamed && !isTuple && !expectTuple) {
  2127. return exprs.back();
  2128. }
  2129. if (topLevelGroupBy) {
  2130. if (isTuple) {
  2131. Ctx.IncrementMonCounter("sql_errors", "SimpleTupleInGroupBy");
  2132. Token(node.GetBlock3().GetToken1());
  2133. Ctx.Error() << "Unexpected trailing comma in grouping elements list";
  2134. return nullptr;
  2135. }
  2136. Ctx.IncrementMonCounter("sql_features", "ListOfNamedNode");
  2137. return BuildListOfNamedNodes(pos, std::move(exprs));
  2138. }
  2139. Ctx.IncrementMonCounter("sql_features", hasUnnamed ? "SimpleTuple" : "SimpleStruct");
  2140. return (hasUnnamed || expectTuple || exprs.size() == 0) ? BuildTuple(pos, exprs) : BuildStructure(pos, exprs);
  2141. }
  2142. } // namespace NSQLTranslationV1