sql_select.cpp 56 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470
  1. #include "sql_select.h"
  2. #include "sql_call_expr.h"
  3. #include "sql_expression.h"
  4. #include "sql_group_by.h"
  5. #include "sql_values.h"
  6. #include "sql_match_recognize.h"
  7. namespace NSQLTranslationV1 {
  8. using namespace NSQLv1Generated;
  9. namespace {
  10. bool IsColumnsOnly(const TVector<TSortSpecificationPtr>& container) {
  11. for (const auto& elem: container) {
  12. if (!elem->OrderExpr->GetColumnName()) {
  13. return false;
  14. }
  15. }
  16. return true;
  17. }
  18. bool CollectJoinLinkSettings(TPosition pos, TJoinLinkSettings& linkSettings, TContext& ctx) {
  19. linkSettings = {};
  20. auto hints = ctx.PullHintForToken(pos);
  21. for (const auto& hint: hints) {
  22. const auto canonizedName = to_lower(hint.Name);
  23. auto newStrategy = TJoinLinkSettings::EStrategy::Default;
  24. if (canonizedName == "merge") {
  25. newStrategy = TJoinLinkSettings::EStrategy::SortedMerge;
  26. } else if (canonizedName == "streamlookup") {
  27. newStrategy = TJoinLinkSettings::EStrategy::StreamLookup;
  28. } else if (canonizedName == "map") {
  29. newStrategy = TJoinLinkSettings::EStrategy::ForceMap;
  30. } else if (canonizedName == "grace") {
  31. newStrategy = TJoinLinkSettings::EStrategy::ForceGrace;
  32. } else if (canonizedName == "compact") {
  33. linkSettings.Compact = true;
  34. continue;
  35. } else {
  36. ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Unsupported join hint: " << hint.Name;
  37. }
  38. if (TJoinLinkSettings::EStrategy::Default == linkSettings.Strategy) {
  39. linkSettings.Strategy = newStrategy;
  40. } else if (newStrategy == linkSettings.Strategy) {
  41. ctx.Error() << "Duplicate join strategy hint";
  42. return false;
  43. } else {
  44. ctx.Error() << "Conflicting join strategy hints";
  45. return false;
  46. }
  47. }
  48. return true;
  49. }
  50. } // namespace
  51. bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos) {
  52. // block: (join_op (ANY)? flatten_source join_constraint?)
  53. // join_op:
  54. // COMMA
  55. // | (NATURAL)? ((LEFT (ONLY | SEMI)? | RIGHT (ONLY | SEMI)? | EXCLUSION | FULL)? (OUTER)? | INNER | CROSS) JOIN
  56. //;
  57. const auto& node = block.GetRule_join_op1();
  58. TString joinOp("Inner");
  59. TJoinLinkSettings linkSettings;
  60. switch (node.Alt_case()) {
  61. case TRule_join_op::kAltJoinOp1: {
  62. joinOp = "Cross";
  63. if (!Ctx.AnsiImplicitCrossJoin) {
  64. Error() << "Cartesian product of tables is disabled. Please use "
  65. "explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin";
  66. return false;
  67. }
  68. auto alt = node.GetAlt_join_op1();
  69. if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken1()), linkSettings, Ctx)) {
  70. return false;
  71. }
  72. Ctx.IncrementMonCounter("sql_join_operations", "CartesianProduct");
  73. break;
  74. }
  75. case TRule_join_op::kAltJoinOp2: {
  76. auto alt = node.GetAlt_join_op2();
  77. if (alt.HasBlock1()) {
  78. Ctx.IncrementMonCounter("sql_join_operations", "Natural");
  79. Error() << "Natural join is not implemented yet";
  80. return false;
  81. }
  82. if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken3()), linkSettings, Ctx)) {
  83. return false;
  84. }
  85. switch (alt.GetBlock2().Alt_case()) {
  86. case TRule_join_op::TAlt2::TBlock2::kAlt1:
  87. if (alt.GetBlock2().GetAlt1().HasBlock1()) {
  88. auto block = alt.GetBlock2().GetAlt1().GetBlock1();
  89. switch (block.Alt_case()) {
  90. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt1:
  91. // left
  92. joinOp = Token(block.GetAlt1().GetToken1());
  93. if (block.GetAlt1().HasBlock2()) {
  94. joinOp += " " + Token(block.GetAlt1().GetBlock2().GetToken1());
  95. }
  96. break;
  97. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt2:
  98. // right
  99. joinOp = Token(block.GetAlt2().GetToken1());
  100. if (block.GetAlt2().HasBlock2()) {
  101. joinOp += " " + Token(block.GetAlt2().GetBlock2().GetToken1());
  102. }
  103. break;
  104. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt3:
  105. // exclusion
  106. joinOp = Token(block.GetAlt3().GetToken1());
  107. break;
  108. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt4:
  109. // full
  110. joinOp = Token(block.GetAlt4().GetToken1());
  111. break;
  112. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::ALT_NOT_SET:
  113. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
  114. AltNotImplemented("join_op", node);
  115. return false;
  116. }
  117. }
  118. if (alt.GetBlock2().GetAlt1().HasBlock2()) {
  119. TString normalizedOp = alt.GetBlock2().GetAlt1().HasBlock1() ? joinOp : "";
  120. normalizedOp.to_upper();
  121. if (!(normalizedOp == "LEFT" || normalizedOp == "RIGHT" || normalizedOp == "FULL")) {
  122. Token(alt.GetBlock2().GetAlt1().GetBlock2().GetToken1());
  123. Error() << "Invalid join type: " << normalizedOp << (normalizedOp.empty() ? "" : " ") << "OUTER JOIN. "
  124. << "OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL";
  125. Ctx.IncrementMonCounter("sql_errors", "BadJoinType");
  126. return false;
  127. }
  128. }
  129. break;
  130. case TRule_join_op::TAlt2::TBlock2::kAlt2:
  131. joinOp = Token(alt.GetBlock2().GetAlt2().GetToken1());
  132. break;
  133. case TRule_join_op::TAlt2::TBlock2::kAlt3:
  134. joinOp = Token(alt.GetBlock2().GetAlt3().GetToken1());
  135. break;
  136. case TRule_join_op::TAlt2::TBlock2::ALT_NOT_SET:
  137. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
  138. AltNotImplemented("join_op", node);
  139. return false;
  140. }
  141. Ctx.IncrementMonCounter("sql_features", "Join");
  142. Ctx.IncrementMonCounter("sql_join_operations", joinOp);
  143. break;
  144. }
  145. case TRule_join_op::ALT_NOT_SET:
  146. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation2");
  147. AltNotImplemented("join_op", node);
  148. return false;
  149. }
  150. joinOp = NormalizeJoinOp(joinOp);
  151. if (linkSettings.Strategy != TJoinLinkSettings::EStrategy::Default && joinOp == "Cross") {
  152. Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_UNUSED_HINT) << "Non-default join strategy will not be used for CROSS JOIN";
  153. linkSettings.Strategy = TJoinLinkSettings::EStrategy::Default;
  154. }
  155. TNodePtr joinKeyExpr;
  156. if (block.HasBlock4()) {
  157. if (joinOp == "Cross") {
  158. Error() << "Cross join should not have ON or USING expression";
  159. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  160. return false;
  161. }
  162. joinKeyExpr = JoinExpr(join, block.GetBlock4().GetRule_join_constraint1());
  163. if (!joinKeyExpr) {
  164. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  165. return false;
  166. }
  167. }
  168. else {
  169. if (joinOp != "Cross") {
  170. Error() << "Expected ON or USING expression";
  171. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  172. return false;
  173. }
  174. }
  175. if (joinOp == "Cross" && anyPos) {
  176. Ctx.Error(*anyPos) << "ANY should not be used with Cross JOIN";
  177. Ctx.IncrementMonCounter("sql_errors", "BadJoinAny");
  178. return false;
  179. }
  180. Y_DEBUG_ABORT_UNLESS(join->GetJoin());
  181. join->GetJoin()->SetupJoin(joinOp, joinKeyExpr, linkSettings);
  182. return true;
  183. }
  184. TNodePtr TSqlSelect::JoinExpr(ISource* join, const TRule_join_constraint& node) {
  185. switch (node.Alt_case()) {
  186. case TRule_join_constraint::kAltJoinConstraint1: {
  187. auto& alt = node.GetAlt_join_constraint1();
  188. Token(alt.GetToken1());
  189. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  190. TSqlExpression expr(Ctx, Mode);
  191. return expr.Build(alt.GetRule_expr2());
  192. }
  193. case TRule_join_constraint::kAltJoinConstraint2: {
  194. auto& alt = node.GetAlt_join_constraint2();
  195. Token(alt.GetToken1());
  196. TPosition pos(Ctx.Pos());
  197. TVector<TDeferredAtom> names;
  198. if (!PureColumnOrNamedListStr(alt.GetRule_pure_column_or_named_list2(), *this, names)) {
  199. return nullptr;
  200. }
  201. Y_DEBUG_ABORT_UNLESS(join->GetJoin());
  202. return join->GetJoin()->BuildJoinKeys(Ctx, names);
  203. }
  204. case TRule_join_constraint::ALT_NOT_SET:
  205. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinConstraint");
  206. AltNotImplemented("join_constraint", node);
  207. break;
  208. }
  209. return nullptr;
  210. }
  211. bool TSqlSelect::FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs,
  212. const TRule_flatten_by_arg& node)
  213. {
  214. // flatten_by_arg:
  215. // named_column
  216. // | LPAREN named_expr_list COMMA? RPAREN
  217. // ;
  218. flattenByColumns.clear();
  219. flattenByExprs.clear();
  220. TVector<TNodePtr> namedExprs;
  221. switch (node.Alt_case()) {
  222. case TRule_flatten_by_arg::kAltFlattenByArg1: {
  223. TVector<TNodePtr> columns;
  224. if (!NamedColumn(columns, node.GetAlt_flatten_by_arg1().GetRule_named_column1())) {
  225. return false;
  226. }
  227. YQL_ENSURE(columns.size() == 1);
  228. auto& column = columns.back();
  229. auto columnNamePtr = column->GetColumnName();
  230. YQL_ENSURE(columnNamePtr && *columnNamePtr);
  231. auto sourcePtr = column->GetSourceName();
  232. const bool isEmptySource = !sourcePtr || !*sourcePtr;
  233. if (isEmptySource || *sourcePtr == sourceLabel) {
  234. // select * from T flatten by x
  235. // select * from T as s flatten by x
  236. // select * from T as s flatten by s.x
  237. flattenByColumns.emplace_back(std::move(column));
  238. } else {
  239. // select * from T as s flatten by x.y as z
  240. if (!column->GetLabel()) {
  241. Ctx.Error(column->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
  242. return false;
  243. }
  244. flattenByColumns.emplace_back(BuildColumn(column->GetPos(), column->GetLabel()));
  245. TVector<INode::TIdPart> ids;
  246. ids.push_back(BuildColumn(column->GetPos()));
  247. ids.push_back(*sourcePtr);
  248. ids.push_back(*columnNamePtr);
  249. auto node = BuildAccess(column->GetPos(), ids, false);
  250. node->SetLabel(column->GetLabel());
  251. flattenByExprs.emplace_back(std::move(node));
  252. }
  253. break;
  254. }
  255. case TRule_flatten_by_arg::kAltFlattenByArg2: {
  256. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  257. if (!NamedExprList(node.GetAlt_flatten_by_arg2().GetRule_named_expr_list2(), namedExprs) || Ctx.HasPendingErrors) {
  258. return false;
  259. }
  260. for (auto& namedExprNode : namedExprs) {
  261. YQL_ENSURE(!namedExprNode->ContentListPtr());
  262. auto sourcePtr = namedExprNode->GetSourceName();
  263. const bool isEmptySource = !sourcePtr || !*sourcePtr;
  264. auto columnNamePtr = namedExprNode->GetColumnName();
  265. if (columnNamePtr && (isEmptySource || *sourcePtr == sourceLabel)) {
  266. namedExprNode->AssumeColumn();
  267. flattenByColumns.emplace_back(std::move(namedExprNode));
  268. } else {
  269. auto nodeLabel = namedExprNode->GetLabel();
  270. if (!nodeLabel) {
  271. Ctx.Error(namedExprNode->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
  272. return false;
  273. }
  274. flattenByColumns.emplace_back(BuildColumn(namedExprNode->GetPos(), nodeLabel));
  275. flattenByExprs.emplace_back(std::move(namedExprNode));
  276. }
  277. }
  278. break;
  279. }
  280. case TRule_flatten_by_arg::ALT_NOT_SET:
  281. Ctx.IncrementMonCounter("sql_errors", "UnknownFlattenByArg");
  282. AltNotImplemented("flatten_by_arg", node);
  283. return false;
  284. }
  285. return true;
  286. }
  287. TSourcePtr TSqlSelect::FlattenSource(const TRule_flatten_source& node) {
  288. auto source = NamedSingleSource(node.GetRule_named_single_source1(), true);
  289. if (!source) {
  290. return nullptr;
  291. }
  292. if (node.HasBlock2()) {
  293. auto flatten = node.GetBlock2();
  294. auto flatten2 = flatten.GetBlock2();
  295. switch (flatten2.Alt_case()) {
  296. case TRule_flatten_source::TBlock2::TBlock2::kAlt1: {
  297. TString mode = "auto";
  298. if (flatten2.GetAlt1().HasBlock1()) {
  299. mode = to_lower(Token(flatten2.GetAlt1().GetBlock1().GetToken1()));
  300. }
  301. TVector<TNodePtr> flattenByColumns;
  302. TVector<TNodePtr> flattenByExprs;
  303. if (!FlattenByArg(source->GetLabel(), flattenByColumns, flattenByExprs, flatten2.GetAlt1().GetRule_flatten_by_arg3())) {
  304. return nullptr;
  305. }
  306. Ctx.IncrementMonCounter("sql_features", "FlattenByColumns");
  307. if (!source->AddExpressions(Ctx, flattenByColumns, EExprSeat::FlattenBy)) {
  308. return nullptr;
  309. }
  310. if (!source->AddExpressions(Ctx, flattenByExprs, EExprSeat::FlattenByExpr)) {
  311. return nullptr;
  312. }
  313. source->SetFlattenByMode(mode);
  314. break;
  315. }
  316. case TRule_flatten_source::TBlock2::TBlock2::kAlt2: {
  317. Ctx.IncrementMonCounter("sql_features", "FlattenColumns");
  318. source->MarkFlattenColumns();
  319. break;
  320. }
  321. case TRule_flatten_source::TBlock2::TBlock2::ALT_NOT_SET:
  322. Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn");
  323. AltNotImplemented("flatten_source", flatten2);
  324. }
  325. }
  326. return source;
  327. }
  328. TSourcePtr TSqlSelect::JoinSource(const TRule_join_source& node) {
  329. // join_source: (ANY)? flatten_source (join_op (ANY)? flatten_source join_constraint?)*;
  330. if (node.HasBlock1() && !node.Block3Size()) {
  331. Error() << "ANY is not allowed without JOIN";
  332. return nullptr;
  333. }
  334. TSourcePtr source(FlattenSource(node.GetRule_flatten_source2()));
  335. if (!source) {
  336. return nullptr;
  337. }
  338. if (node.Block3Size()) {
  339. TPosition pos(Ctx.Pos());
  340. TVector<TSourcePtr> sources;
  341. TVector<TMaybe<TPosition>> anyPositions;
  342. TVector<bool> anyFlags;
  343. sources.emplace_back(std::move(source));
  344. anyPositions.emplace_back(node.HasBlock1() ? Ctx.TokenPosition(node.GetBlock1().GetToken1()) : TMaybe<TPosition>());
  345. anyFlags.push_back(bool(anyPositions.back()));
  346. for (auto& block: node.GetBlock3()) {
  347. sources.emplace_back(FlattenSource(block.GetRule_flatten_source3()));
  348. if (!sources.back()) {
  349. Ctx.IncrementMonCounter("sql_errors", "NoJoinWith");
  350. return nullptr;
  351. }
  352. anyPositions.emplace_back(block.HasBlock2() ? Ctx.TokenPosition(block.GetBlock2().GetToken1()) : TMaybe<TPosition>());
  353. anyFlags.push_back(bool(anyPositions.back()));
  354. }
  355. source = BuildEquiJoin(pos, std::move(sources), std::move(anyFlags), Ctx.Scoped->StrictJoinKeyTypes);
  356. size_t idx = 1;
  357. for (auto& block: node.GetBlock3()) {
  358. YQL_ENSURE(idx < anyPositions.size());
  359. TMaybe<TPosition> leftAny = (idx == 1) ? anyPositions[0] : Nothing();
  360. TMaybe<TPosition> rightAny = anyPositions[idx];
  361. if (!JoinOp(source.Get(), block, leftAny ? leftAny : rightAny)) {
  362. Ctx.IncrementMonCounter("sql_errors", "NoJoinOp");
  363. return nullptr;
  364. }
  365. ++idx;
  366. }
  367. }
  368. return source;
  369. }
  370. bool TSqlSelect::SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node) {
  371. // result_column:
  372. // opt_id_prefix ASTERISK
  373. // | expr ((AS an_id) | an_id_pure)?
  374. // ;
  375. switch (node.Alt_case()) {
  376. case TRule_result_column::kAltResultColumn1: {
  377. auto alt = node.GetAlt_result_column1();
  378. Token(alt.GetToken2());
  379. auto idAsteriskQualify = OptIdPrefixAsStr(alt.GetRule_opt_id_prefix1(), *this);
  380. Ctx.IncrementMonCounter("sql_features", idAsteriskQualify ? "QualifyAsterisk" : "Asterisk");
  381. terms.push_back(BuildColumn(Ctx.Pos(), "*", idAsteriskQualify));
  382. break;
  383. }
  384. case TRule_result_column::kAltResultColumn2: {
  385. auto alt = node.GetAlt_result_column2();
  386. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  387. TSqlExpression expr(Ctx, Mode);
  388. TNodePtr term(expr.Build(alt.GetRule_expr1()));
  389. if (!term) {
  390. Ctx.IncrementMonCounter("sql_errors", "NoTerm");
  391. return false;
  392. }
  393. if (alt.HasBlock2()) {
  394. TString label;
  395. bool implicitLabel = false;
  396. switch (alt.GetBlock2().Alt_case()) {
  397. case TRule_result_column_TAlt2_TBlock2::kAlt1:
  398. label = Id(alt.GetBlock2().GetAlt1().GetRule_an_id_or_type2(), *this);
  399. break;
  400. case TRule_result_column_TAlt2_TBlock2::kAlt2:
  401. label = Id(alt.GetBlock2().GetAlt2().GetRule_an_id_as_compat1(), *this);
  402. if (!Ctx.AnsiOptionalAs) {
  403. // AS is mandatory
  404. Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
  405. return false;
  406. }
  407. implicitLabel = true;
  408. break;
  409. case TRule_result_column_TAlt2_TBlock2::ALT_NOT_SET:
  410. Y_ABORT("You should change implementation according to grammar changes");
  411. }
  412. term->SetLabel(label, Ctx.Pos());
  413. term->MarkImplicitLabel(implicitLabel);
  414. }
  415. terms.push_back(term);
  416. break;
  417. }
  418. case TRule_result_column::ALT_NOT_SET:
  419. Ctx.IncrementMonCounter("sql_errors", "UnknownResultColumn");
  420. AltNotImplemented("result_column", node);
  421. return false;
  422. }
  423. return true;
  424. }
  425. bool TSqlSelect::ValidateSelectColumns(const TVector<TNodePtr>& terms) {
  426. TSet<TString> labels;
  427. TSet<TString> asteriskSources;
  428. for (const auto& term: terms) {
  429. const auto& label = term->GetLabel();
  430. if (!Ctx.PragmaAllowDotInAlias && label.find('.') != TString::npos) {
  431. Ctx.Error(term->GetPos()) << "Unable to use '.' in column name. Invalid column name: " << label;
  432. return false;
  433. }
  434. if (!label.empty()) {
  435. if (!labels.insert(label).second) {
  436. Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << label;
  437. return false;
  438. }
  439. }
  440. if (term->IsAsterisk()) {
  441. const auto& source = *term->GetSourceName();
  442. if (source.empty() && terms.ysize() > 1) {
  443. Ctx.Error(term->GetPos()) << "Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).";
  444. return false;
  445. } else if (!asteriskSources.insert(source).second) {
  446. Ctx.Error(term->GetPos()) << "Unable to use twice same quialified asterisk. Invalid source: " << source;
  447. return false;
  448. }
  449. } else if (label.empty()) {
  450. const auto* column = term->GetColumnName();
  451. if (column && !column->empty()) {
  452. const auto& source = *term->GetSourceName();
  453. const auto usedName = source.empty() ? *column : source + '.' + *column;
  454. if (!labels.insert(usedName).second) {
  455. Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << usedName;
  456. return false;
  457. }
  458. }
  459. }
  460. }
  461. return true;
  462. }
  463. TSourcePtr TSqlSelect::SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery) {
  464. switch (node.Alt_case()) {
  465. case TRule_single_source::kAltSingleSource1: {
  466. const auto& alt = node.GetAlt_single_source1();
  467. const auto& table_ref = alt.GetRule_table_ref1();
  468. if (auto maybeSource = AsTableImpl(table_ref)) {
  469. auto source = *maybeSource;
  470. if (!source) {
  471. return nullptr;
  472. }
  473. return source;
  474. } else {
  475. TTableRef table;
  476. if (!TableRefImpl(alt.GetRule_table_ref1(), table, unorderedSubquery)) {
  477. return nullptr;
  478. }
  479. if (table.Source) {
  480. return table.Source;
  481. }
  482. TPosition pos(Ctx.Pos());
  483. Ctx.IncrementMonCounter("sql_select_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown");
  484. return BuildTableSource(pos, table);
  485. }
  486. }
  487. case TRule_single_source::kAltSingleSource2: {
  488. const auto& alt = node.GetAlt_single_source2();
  489. Token(alt.GetToken1());
  490. TSqlSelect innerSelect(Ctx, Mode);
  491. TPosition pos;
  492. auto source = innerSelect.Build(alt.GetRule_select_stmt2(), pos);
  493. if (!source) {
  494. return nullptr;
  495. }
  496. return BuildInnerSource(pos, BuildSourceNode(pos, std::move(source)), Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster);
  497. }
  498. case TRule_single_source::kAltSingleSource3: {
  499. const auto& alt = node.GetAlt_single_source3();
  500. TPosition pos;
  501. return TSqlValues(Ctx, Mode).Build(alt.GetRule_values_stmt2(), pos, derivedColumns, derivedColumnsPos);
  502. }
  503. case TRule_single_source::ALT_NOT_SET:
  504. AltNotImplemented("single_source", node);
  505. Ctx.IncrementMonCounter("sql_errors", "UnknownSingleSource");
  506. return nullptr;
  507. }
  508. }
  509. TSourcePtr TSqlSelect::NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery) {
  510. // named_single_source: single_source match_recognize_clause? (((AS an_id) | an_id_as_compat) pure_column_list?)? (sample_clause | tablesample_clause)?;
  511. TVector<TString> derivedColumns;
  512. TPosition derivedColumnsPos;
  513. if (node.HasBlock3() && node.GetBlock3().HasBlock2()) {
  514. const auto& columns = node.GetBlock3().GetBlock2().GetRule_pure_column_list1();
  515. Token(columns.GetToken1());
  516. derivedColumnsPos = Ctx.Pos();
  517. if (node.GetRule_single_source1().Alt_case() != TRule_single_source::kAltSingleSource3) {
  518. Error() << "Derived column list is only supported for VALUES";
  519. return nullptr;
  520. }
  521. PureColumnListStr(columns, *this, derivedColumns);
  522. }
  523. auto singleSource = SingleSource(node.GetRule_single_source1(), derivedColumns, derivedColumnsPos, unorderedSubquery);
  524. if (!singleSource) {
  525. return nullptr;
  526. }
  527. if (node.HasBlock2()) {
  528. if (node.HasBlock4()) {
  529. //CAN/CSA-ISO/IEC 9075-2:18 7.6 <table reference>
  530. //4) TF shall not simply contain both a <sample clause> and a <row pattern recognition clause and name>.
  531. Ctx.Error() << "Source shall not simply contain both a sample clause and a row pattern recognition clause";
  532. return {};
  533. }
  534. auto matchRecognizeClause = TSqlMatchRecognizeClause(Ctx, Mode);
  535. auto matchRecognize = matchRecognizeClause.CreateBuilder(node.GetBlock2().GetRule_row_pattern_recognition_clause1());
  536. singleSource->SetMatchRecognize(matchRecognize);
  537. }
  538. if (node.HasBlock3()) {
  539. TString label;
  540. switch (node.GetBlock3().GetBlock1().Alt_case()) {
  541. case TRule_named_single_source_TBlock3_TBlock1::kAlt1:
  542. label = Id(node.GetBlock3().GetBlock1().GetAlt1().GetRule_an_id2(), *this);
  543. break;
  544. case TRule_named_single_source_TBlock3_TBlock1::kAlt2:
  545. label = Id(node.GetBlock3().GetBlock1().GetAlt2().GetRule_an_id_as_compat1(), *this);
  546. if (!Ctx.AnsiOptionalAs) {
  547. // AS is mandatory
  548. Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
  549. return {};
  550. }
  551. break;
  552. case TRule_named_single_source_TBlock3_TBlock1::ALT_NOT_SET:
  553. Y_ABORT("You should change implementation according to grammar changes");
  554. }
  555. singleSource->SetLabel(label);
  556. }
  557. if (node.HasBlock4()) {
  558. ESampleClause sampleClause;
  559. ESampleMode mode;
  560. TSqlExpression expr(Ctx, Mode);
  561. TNodePtr samplingRateNode;
  562. TNodePtr samplingSeedNode;
  563. const auto& sampleBlock = node.GetBlock4();
  564. TPosition pos;
  565. switch (sampleBlock.Alt_case()) {
  566. case TRule_named_single_source::TBlock4::kAlt1:
  567. {
  568. sampleClause = ESampleClause::Sample;
  569. mode = ESampleMode::Bernoulli;
  570. const auto& sampleExpr = sampleBlock.GetAlt1().GetRule_sample_clause1().GetRule_expr2();
  571. samplingRateNode = expr.Build(sampleExpr);
  572. if (!samplingRateNode) {
  573. return nullptr;
  574. }
  575. pos = GetPos(sampleBlock.GetAlt1().GetRule_sample_clause1().GetToken1());
  576. Ctx.IncrementMonCounter("sql_features", "SampleClause");
  577. }
  578. break;
  579. case TRule_named_single_source::TBlock4::kAlt2:
  580. {
  581. sampleClause = ESampleClause::TableSample;
  582. const auto& tableSampleClause = sampleBlock.GetAlt2().GetRule_tablesample_clause1();
  583. const auto& modeToken = tableSampleClause.GetRule_sampling_mode2().GetToken1();
  584. const TCiString& token = Token(modeToken);
  585. if (token == "system") {
  586. mode = ESampleMode::System;
  587. } else if (token == "bernoulli") {
  588. mode = ESampleMode::Bernoulli;
  589. } else {
  590. Ctx.Error(GetPos(modeToken)) << "Unsupported sampling mode: " << token;
  591. Ctx.IncrementMonCounter("sql_errors", "UnsupportedSamplingMode");
  592. return nullptr;
  593. }
  594. const auto& tableSampleExpr = tableSampleClause.GetRule_expr4();
  595. samplingRateNode = expr.Build(tableSampleExpr);
  596. if (!samplingRateNode) {
  597. return nullptr;
  598. }
  599. if (tableSampleClause.HasBlock6()) {
  600. const auto& repeatableExpr = tableSampleClause.GetBlock6().GetRule_repeatable_clause1().GetRule_expr3();
  601. samplingSeedNode = expr.Build(repeatableExpr);
  602. if (!samplingSeedNode) {
  603. return nullptr;
  604. }
  605. }
  606. pos = GetPos(sampleBlock.GetAlt2().GetRule_tablesample_clause1().GetToken1());
  607. Ctx.IncrementMonCounter("sql_features", "SampleClause");
  608. }
  609. break;
  610. case TRule_named_single_source::TBlock4::ALT_NOT_SET:
  611. Y_ABORT("SampleClause: does not corresond to grammar changes");
  612. }
  613. if (!singleSource->SetSamplingOptions(Ctx, pos, sampleClause, mode, samplingRateNode, samplingSeedNode)) {
  614. Ctx.IncrementMonCounter("sql_errors", "IncorrectSampleClause");
  615. return nullptr;
  616. }
  617. }
  618. return singleSource;
  619. }
  620. bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node) {
  621. const auto sourceName = OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), *this);
  622. const auto columnName = Id(node.GetRule_an_id2(), *this);
  623. if (columnName.empty()) {
  624. // TDOD: Id() should return TMaybe<TString>
  625. if (!Ctx.HasPendingErrors) {
  626. Ctx.Error() << "Empty column name is not allowed";
  627. }
  628. return false;
  629. }
  630. keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
  631. return true;
  632. }
  633. bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node) {
  634. // without_column_name: (an_id DOT an_id) | an_id_without;
  635. TString sourceName;
  636. TString columnName;
  637. switch (node.Alt_case()) {
  638. case TRule_without_column_name::kAltWithoutColumnName1:
  639. sourceName = Id(node.GetAlt_without_column_name1().GetRule_an_id1(), *this);
  640. columnName = Id(node.GetAlt_without_column_name1().GetRule_an_id3(), *this);
  641. break;
  642. case TRule_without_column_name::kAltWithoutColumnName2:
  643. columnName = Id(node.GetAlt_without_column_name2().GetRule_an_id_without1(), *this);
  644. break;
  645. case TRule_without_column_name::ALT_NOT_SET:
  646. Y_ABORT("You should change implementation according to grammar changes");
  647. }
  648. if (columnName.empty()) {
  649. // TDOD: Id() should return TMaybe<TString>
  650. if (!Ctx.HasPendingErrors) {
  651. Ctx.Error() << "Empty column name is not allowed";
  652. }
  653. return false;
  654. }
  655. keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
  656. return true;
  657. }
  658. template<typename TRule>
  659. bool TSqlSelect::ColumnList(TVector<TNodePtr>& keys, const TRule& node) {
  660. bool result;
  661. if constexpr (std::is_same_v<TRule, TRule_column_list>) {
  662. result = ColumnName(keys, node.GetRule_column_name1());
  663. } else {
  664. result = ColumnName(keys, node.GetRule_without_column_name1());
  665. }
  666. if (!result) {
  667. return false;
  668. }
  669. for (auto b: node.GetBlock2()) {
  670. Token(b.GetToken1());
  671. if constexpr (std::is_same_v<TRule, TRule_column_list>) {
  672. result = ColumnName(keys, b.GetRule_column_name2());
  673. } else {
  674. result = ColumnName(keys, b.GetRule_without_column_name2());
  675. }
  676. if (!result) {
  677. return false;
  678. }
  679. }
  680. return true;
  681. }
  682. bool TSqlSelect::NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node) {
  683. if (!ColumnName(columnList, node.GetRule_column_name1())) {
  684. return false;
  685. }
  686. if (node.HasBlock2()) {
  687. const auto label = Id(node.GetBlock2().GetRule_an_id2(), *this);
  688. columnList.back()->SetLabel(label);
  689. }
  690. return true;
  691. }
  692. TSourcePtr TSqlSelect::ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos) {
  693. // PROCESS STREAM? named_single_source (COMMA named_single_source)* (USING using_call_expr (AS an_id)?
  694. // (WITH external_call_settings)?
  695. // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?)?
  696. Token(node.GetToken1());
  697. TPosition startPos(Ctx.Pos());
  698. if (!selectPos) {
  699. selectPos = startPos;
  700. }
  701. const bool hasUsing = node.HasBlock5();
  702. const bool unorderedSubquery = hasUsing;
  703. TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source3(), unorderedSubquery));
  704. if (!source) {
  705. return nullptr;
  706. }
  707. if (node.GetBlock4().size()) {
  708. TVector<TSourcePtr> sources(1, source);
  709. for (auto& s: node.GetBlock4()) {
  710. sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), unorderedSubquery));
  711. if (!sources.back()) {
  712. return nullptr;
  713. }
  714. }
  715. auto pos = source->GetPos();
  716. source = BuildMuxSource(pos, std::move(sources));
  717. }
  718. const bool processStream = node.HasBlock2();
  719. if (!hasUsing) {
  720. return BuildProcess(startPos, std::move(source), nullptr, false, {}, false, processStream, settings, {});
  721. }
  722. const auto& block5 = node.GetBlock5();
  723. if (block5.HasBlock5()) {
  724. TSqlExpression expr(Ctx, Mode);
  725. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  726. TNodePtr where = expr.Build(block5.GetBlock5().GetRule_expr2());
  727. if (!where || !source->AddFilter(Ctx, where)) {
  728. return nullptr;
  729. }
  730. Ctx.IncrementMonCounter("sql_features", "ProcessWhere");
  731. } else {
  732. Ctx.IncrementMonCounter("sql_features", processStream ? "ProcessStream" : "Process");
  733. }
  734. if (block5.HasBlock6()) {
  735. Ctx.Error() << "PROCESS does not allow HAVING yet! You may request it on yql@ maillist.";
  736. return nullptr;
  737. }
  738. bool listCall = false;
  739. TSqlCallExpr call(Ctx, Mode);
  740. bool initRet = call.Init(block5.GetRule_using_call_expr2());
  741. if (initRet) {
  742. call.IncCounters();
  743. }
  744. if (!initRet) {
  745. return nullptr;
  746. }
  747. auto args = call.GetArgs();
  748. for (auto& arg: args) {
  749. if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
  750. if (listCall) {
  751. Ctx.Error() << "Only one TableRows() argument is allowed.";
  752. return nullptr;
  753. }
  754. listCall = true;
  755. }
  756. }
  757. if (!call.IsExternal() && block5.HasBlock4()) {
  758. Ctx.Error() << "PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block";
  759. return nullptr;
  760. }
  761. if (block5.HasBlock4()) {
  762. const auto& block54 = block5.GetBlock4();
  763. if (!call.ConfigureExternalCall(block54.GetRule_external_call_settings2())) {
  764. return nullptr;
  765. }
  766. }
  767. TSqlCallExpr finalCall(call, args);
  768. TNodePtr with(finalCall.IsExternal() ? finalCall.BuildCall() : finalCall.BuildUdf(/* forReduce = */ false));
  769. if (!with) {
  770. return {};
  771. }
  772. args = finalCall.GetArgs();
  773. if (call.IsExternal())
  774. listCall = true;
  775. if (block5.HasBlock3()) {
  776. with->SetLabel(Id(block5.GetBlock3().GetRule_an_id2(), *this));
  777. }
  778. if (call.IsExternal() && block5.HasBlock7()) {
  779. Ctx.Error() << "PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block";
  780. return nullptr;
  781. }
  782. TVector<TSortSpecificationPtr> assumeOrderBy;
  783. if (block5.HasBlock7()) {
  784. if (!OrderByClause(block5.GetBlock7().GetRule_order_by_clause2(), assumeOrderBy)) {
  785. return nullptr;
  786. }
  787. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
  788. }
  789. return BuildProcess(startPos, std::move(source), with, finalCall.IsExternal(), std::move(args), listCall, processStream, settings, assumeOrderBy);
  790. }
  791. TSourcePtr TSqlSelect::ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos) {
  792. // REDUCE named_single_source (COMMA named_single_source)* (PRESORT sort_specification_list)?
  793. // ON column_list USING ALL? using_call_expr (AS an_id)?
  794. // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?
  795. Token(node.GetToken1());
  796. TPosition startPos(Ctx.Pos());
  797. if (!selectPos) {
  798. selectPos = startPos;
  799. }
  800. TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source2(), true));
  801. if (!source) {
  802. return {};
  803. }
  804. if (node.GetBlock3().size()) {
  805. TVector<TSourcePtr> sources(1, source);
  806. for (auto& s: node.GetBlock3()) {
  807. sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), true));
  808. if (!sources.back()) {
  809. return nullptr;
  810. }
  811. }
  812. auto pos = source->GetPos();
  813. source = BuildMuxSource(pos, std::move(sources));
  814. }
  815. TVector<TSortSpecificationPtr> orderBy;
  816. if (node.HasBlock4()) {
  817. if (!SortSpecificationList(node.GetBlock4().GetRule_sort_specification_list2(), orderBy)) {
  818. return {};
  819. }
  820. }
  821. TVector<TNodePtr> keys;
  822. if (!ColumnList(keys, node.GetRule_column_list6())) {
  823. return nullptr;
  824. }
  825. if (node.HasBlock11()) {
  826. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  827. TSqlExpression expr(Ctx, Mode);
  828. TNodePtr where = expr.Build(node.GetBlock11().GetRule_expr2());
  829. if (!where || !source->AddFilter(Ctx, where)) {
  830. return nullptr;
  831. }
  832. Ctx.IncrementMonCounter("sql_features", "ReduceWhere");
  833. } else {
  834. Ctx.IncrementMonCounter("sql_features", "Reduce");
  835. }
  836. TNodePtr having;
  837. if (node.HasBlock12()) {
  838. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  839. TSqlExpression expr(Ctx, Mode);
  840. having = expr.Build(node.GetBlock12().GetRule_expr2());
  841. if (!having) {
  842. return nullptr;
  843. }
  844. }
  845. bool listCall = false;
  846. TSqlCallExpr call(Ctx, Mode);
  847. bool initRet = call.Init(node.GetRule_using_call_expr9());
  848. if (initRet) {
  849. call.IncCounters();
  850. }
  851. if (!initRet) {
  852. return nullptr;
  853. }
  854. auto args = call.GetArgs();
  855. for (auto& arg: args) {
  856. if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
  857. if (listCall) {
  858. Ctx.Error() << "Only one TableRows() argument is allowed.";
  859. return nullptr;
  860. }
  861. listCall = true;
  862. }
  863. }
  864. TSqlCallExpr finalCall(call, args);
  865. TNodePtr udf(finalCall.BuildUdf(/* forReduce = */ true));
  866. if (!udf) {
  867. return {};
  868. }
  869. if (node.HasBlock10()) {
  870. udf->SetLabel(Id(node.GetBlock10().GetRule_an_id2(), *this));
  871. }
  872. const auto reduceMode = node.HasBlock8() ? ReduceMode::ByAll : ReduceMode::ByPartition;
  873. TVector<TSortSpecificationPtr> assumeOrderBy;
  874. if (node.HasBlock13()) {
  875. if (!OrderByClause(node.GetBlock13().GetRule_order_by_clause2(), assumeOrderBy)) {
  876. return nullptr;
  877. }
  878. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
  879. }
  880. return BuildReduce(startPos, reduceMode, std::move(source), std::move(orderBy), std::move(keys), std::move(args), udf, having,
  881. settings, assumeOrderBy, listCall);
  882. }
  883. TSourcePtr TSqlSelect::SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos,
  884. TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrderBy, bool& selectOpAssumeOrderBy)
  885. {
  886. // (FROM join_source)? SELECT STREAM? opt_set_quantifier result_column (COMMA result_column)* COMMA? (WITHOUT column_list)? (FROM join_source)? (WHERE expr)?
  887. // group_by_clause? (HAVING expr)? window_clause? ext_order_by_clause?
  888. selectOpOrderBy = {};
  889. selectOpAssumeOrderBy = false;
  890. if (node.HasBlock1()) {
  891. Token(node.GetBlock1().GetToken1());
  892. } else {
  893. Token(node.GetToken2());
  894. }
  895. TPosition startPos(Ctx.Pos());
  896. if (!selectPos) {
  897. selectPos = Ctx.Pos();
  898. }
  899. const auto hints = Ctx.PullHintForToken(selectPos);
  900. TColumnsSets uniqueSets, distinctSets;
  901. for (const auto& hint : hints) {
  902. if (const auto& name = to_lower(hint.Name); name == "unique")
  903. uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  904. else if (name == "distinct") {
  905. uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  906. distinctSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  907. } else {
  908. Ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used";
  909. }
  910. }
  911. const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier4());
  912. if (distinct) {
  913. Ctx.IncrementMonCounter("sql_features", "DistinctInSelect");
  914. }
  915. TSourcePtr source(BuildFakeSource(selectPos, /* missingFrom = */ true, Mode == NSQLTranslation::ESqlMode::SUBQUERY));
  916. if (node.HasBlock1() && node.HasBlock9()) {
  917. Token(node.GetBlock9().GetToken1());
  918. Ctx.IncrementMonCounter("sql_errors", "DoubleFrom");
  919. Ctx.Error() << "Only one FROM clause is allowed";
  920. return nullptr;
  921. }
  922. if (node.HasBlock1()) {
  923. source = JoinSource(node.GetBlock1().GetRule_join_source2());
  924. Ctx.IncrementMonCounter("sql_features", "FromInFront");
  925. } else if (node.HasBlock9()) {
  926. source = JoinSource(node.GetBlock9().GetRule_join_source2());
  927. }
  928. if (!source) {
  929. return nullptr;
  930. }
  931. const bool selectStream = node.HasBlock3();
  932. TVector<TNodePtr> without;
  933. if (node.HasBlock8()) {
  934. if (!ColumnList(without, node.GetBlock8().GetRule_without_column_list2())) {
  935. return nullptr;
  936. }
  937. }
  938. if (node.HasBlock10()) {
  939. auto block = node.GetBlock10();
  940. Token(block.GetToken1());
  941. TPosition pos(Ctx.Pos());
  942. TNodePtr where;
  943. {
  944. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  945. TSqlExpression expr(Ctx, Mode);
  946. where = expr.Build(block.GetRule_expr2());
  947. }
  948. if (!where) {
  949. Ctx.IncrementMonCounter("sql_errors", "WhereInvalid");
  950. return nullptr;
  951. }
  952. if (!source->AddFilter(Ctx, where)) {
  953. Ctx.IncrementMonCounter("sql_errors", "WhereNotSupportedBySource");
  954. return nullptr;
  955. }
  956. Ctx.IncrementMonCounter("sql_features", "Where");
  957. }
  958. /// \todo merge gtoupByExpr and groupBy in one
  959. TVector<TNodePtr> groupByExpr, groupBy;
  960. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
  961. bool compactGroupBy = false;
  962. TString groupBySuffix;
  963. if (node.HasBlock11()) {
  964. TGroupByClause clause(Ctx, Mode);
  965. if (!clause.Build(node.GetBlock11().GetRule_group_by_clause1())) {
  966. return nullptr;
  967. }
  968. bool hasHopping = (bool)clause.GetLegacyHoppingWindow();
  969. for (const auto& exprAlias: clause.Aliases()) {
  970. YQL_ENSURE(exprAlias.first == exprAlias.second->GetLabel());
  971. groupByExpr.emplace_back(exprAlias.second);
  972. hasHopping |= (bool)dynamic_cast<THoppingWindow*>(exprAlias.second.Get());
  973. }
  974. groupBy = std::move(clause.Content());
  975. clause.SetFeatures("sql_features");
  976. legacyHoppingWindowSpec = clause.GetLegacyHoppingWindow();
  977. compactGroupBy = clause.IsCompactGroupBy();
  978. groupBySuffix = clause.GetSuffix();
  979. if (source->IsStream() && !hasHopping) {
  980. Ctx.Error() << "Streaming group by query must have a hopping window specification.";
  981. return nullptr;
  982. }
  983. }
  984. TNodePtr having;
  985. if (node.HasBlock12()) {
  986. TSqlExpression expr(Ctx, Mode);
  987. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  988. having = expr.Build(node.GetBlock12().GetRule_expr2());
  989. if (!having) {
  990. return nullptr;
  991. }
  992. Ctx.IncrementMonCounter("sql_features", "Having");
  993. }
  994. TWinSpecs windowSpec;
  995. if (node.HasBlock13()) {
  996. if (source->IsStream()) {
  997. Ctx.Error() << "WINDOW is not allowed in streaming queries";
  998. return nullptr;
  999. }
  1000. if (!WindowClause(node.GetBlock13().GetRule_window_clause1(), windowSpec)) {
  1001. return nullptr;
  1002. }
  1003. Ctx.IncrementMonCounter("sql_features", "WindowClause");
  1004. }
  1005. bool assumeSorted = false;
  1006. TVector<TSortSpecificationPtr> orderBy;
  1007. if (node.HasBlock14()) {
  1008. auto& orderBlock = node.GetBlock14().GetRule_ext_order_by_clause1();
  1009. assumeSorted = orderBlock.HasBlock1();
  1010. Token(orderBlock.GetRule_order_by_clause2().GetToken1());
  1011. if (source->IsStream()) {
  1012. Ctx.Error() << "ORDER BY is not allowed in streaming queries";
  1013. return nullptr;
  1014. }
  1015. if (!ValidateLimitOrderByWithSelectOp(placement, "ORDER BY")) {
  1016. return nullptr;
  1017. }
  1018. if (!OrderByClause(orderBlock.GetRule_order_by_clause2(), orderBy)) {
  1019. return nullptr;
  1020. }
  1021. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(orderBy)
  1022. ? (assumeSorted ? "AssumeOrderBy" : "OrderBy")
  1023. : (assumeSorted ? "AssumeOrderByExpr" : "OrderByExpr")
  1024. );
  1025. if (!NeedPassLimitOrderByToUnderlyingSelect(placement)) {
  1026. selectOpOrderBy.swap(orderBy);
  1027. std::swap(selectOpAssumeOrderBy, assumeSorted);
  1028. }
  1029. }
  1030. TVector<TNodePtr> terms;
  1031. {
  1032. class TScopedWinSpecs {
  1033. public:
  1034. TScopedWinSpecs(TContext& ctx, TWinSpecs& specs)
  1035. : Ctx(ctx)
  1036. {
  1037. Ctx.WinSpecsScopes.push_back(std::ref(specs));
  1038. }
  1039. ~TScopedWinSpecs() {
  1040. Ctx.WinSpecsScopes.pop_back();
  1041. }
  1042. private:
  1043. TContext& Ctx;
  1044. };
  1045. TScopedWinSpecs scoped(Ctx, windowSpec);
  1046. if (!SelectTerm(terms, node.GetRule_result_column5())) {
  1047. return nullptr;
  1048. }
  1049. for (auto block: node.GetBlock6()) {
  1050. if (!SelectTerm(terms, block.GetRule_result_column2())) {
  1051. return nullptr;
  1052. }
  1053. }
  1054. }
  1055. if (!ValidateSelectColumns(terms)) {
  1056. return nullptr;
  1057. }
  1058. return BuildSelectCore(Ctx, startPos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy, having,
  1059. std::move(windowSpec), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  1060. }
  1061. bool TSqlSelect::WindowDefinition(const TRule_window_definition& rule, TWinSpecs& winSpecs) {
  1062. const TString windowName = Id(rule.GetRule_new_window_name1().GetRule_window_name1().GetRule_an_id_window1(), *this);
  1063. if (winSpecs.contains(windowName)) {
  1064. Ctx.Error() << "Unable to declare window with same name: " << windowName;
  1065. return false;
  1066. }
  1067. auto windowSpec = WindowSpecification(rule.GetRule_window_specification3().GetRule_window_specification_details2());
  1068. if (!windowSpec) {
  1069. return false;
  1070. }
  1071. winSpecs.emplace(windowName, std::move(windowSpec));
  1072. return true;
  1073. }
  1074. bool TSqlSelect::WindowClause(const TRule_window_clause& rule, TWinSpecs& winSpecs) {
  1075. auto windowList = rule.GetRule_window_definition_list2();
  1076. if (!WindowDefinition(windowList.GetRule_window_definition1(), winSpecs)) {
  1077. return false;
  1078. }
  1079. for (auto& block: windowList.GetBlock2()) {
  1080. if (!WindowDefinition(block.GetRule_window_definition2(), winSpecs)) {
  1081. return false;
  1082. }
  1083. }
  1084. return true;
  1085. }
  1086. bool TSqlTranslation::OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy) {
  1087. return SortSpecificationList(node.GetRule_sort_specification_list3(), orderBy);
  1088. }
  1089. bool TSqlSelect::ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what) {
  1090. if (!placement.Defined()) {
  1091. // not in select_op chain
  1092. return true;
  1093. }
  1094. if (!placement->IsLastInSelectOp) {
  1095. Ctx.Error() << what << " within UNION ALL is only allowed after last subquery";
  1096. return false;
  1097. }
  1098. return true;
  1099. }
  1100. bool TSqlSelect::NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement) {
  1101. return !placement.Defined() || !placement->IsLastInSelectOp;
  1102. }
  1103. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos,
  1104. TMaybe<TSelectKindPlacement> placement)
  1105. {
  1106. auto res = SelectKind(node.GetRule_select_kind1(), selectPos, placement);
  1107. if (!res) {
  1108. return {};
  1109. }
  1110. TPosition startPos(Ctx.Pos());
  1111. /// LIMIT INTEGER block
  1112. TNodePtr skipTake;
  1113. if (node.HasBlock2()) {
  1114. auto block = node.GetBlock2();
  1115. Token(block.GetToken1());
  1116. TPosition pos(Ctx.Pos());
  1117. if (!ValidateLimitOrderByWithSelectOp(placement, "LIMIT")) {
  1118. return {};
  1119. }
  1120. TSqlExpression takeExpr(Ctx, Mode);
  1121. auto take = takeExpr.Build(block.GetRule_expr2());
  1122. if (!take) {
  1123. return{};
  1124. }
  1125. TNodePtr skip;
  1126. if (block.HasBlock3()) {
  1127. TSqlExpression skipExpr(Ctx, Mode);
  1128. skip = skipExpr.Build(block.GetBlock3().GetRule_expr2());
  1129. if (!skip) {
  1130. return {};
  1131. }
  1132. if (Token(block.GetBlock3().GetToken1()) == ",") {
  1133. // LIMIT skip, take
  1134. skip.Swap(take);
  1135. Ctx.IncrementMonCounter("sql_features", "LimitSkipTake");
  1136. } else {
  1137. Ctx.IncrementMonCounter("sql_features", "LimitOffset");
  1138. }
  1139. }
  1140. auto st = BuildSkipTake(pos, skip, take);
  1141. if (NeedPassLimitOrderByToUnderlyingSelect(placement)) {
  1142. skipTake = st;
  1143. } else {
  1144. res.SelectOpSkipTake = st;
  1145. }
  1146. Ctx.IncrementMonCounter("sql_features", "Limit");
  1147. }
  1148. res.Source = BuildSelect(startPos, std::move(res.Source), skipTake);
  1149. return res;
  1150. }
  1151. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& selectPos,
  1152. TMaybe<TSelectKindPlacement> placement)
  1153. {
  1154. const bool discard = node.HasBlock1();
  1155. const bool hasLabel = node.HasBlock3();
  1156. if (hasLabel && (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW || Mode == NSQLTranslation::ESqlMode::SUBQUERY)) {
  1157. Ctx.Error() << "INTO RESULT is not allowed in current mode";
  1158. return {};
  1159. }
  1160. if (discard && hasLabel) {
  1161. Ctx.Error() << "DISCARD and INTO RESULT cannot be used at the same time";
  1162. return {};
  1163. }
  1164. if (discard && !selectPos) {
  1165. selectPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
  1166. }
  1167. TWriteSettings settings;
  1168. settings.Discard = discard;
  1169. if (hasLabel) {
  1170. settings.Label = PureColumnOrNamed(node.GetBlock3().GetRule_pure_column_or_named3(), *this);
  1171. }
  1172. TSelectKindResult res;
  1173. if (placement.Defined()) {
  1174. if (placement->IsFirstInSelectOp) {
  1175. res.Settings.Discard = settings.Discard;
  1176. } else if (settings.Discard) {
  1177. auto discardPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
  1178. Ctx.Error(discardPos) << "DISCARD within UNION ALL is only allowed before first subquery";
  1179. return {};
  1180. }
  1181. if (placement->IsLastInSelectOp) {
  1182. res.Settings.Label = settings.Label;
  1183. } else if (!settings.Label.Empty()) {
  1184. auto labelPos = Ctx.TokenPosition(node.GetBlock3().GetToken1());
  1185. Ctx.Error(labelPos) << "INTO RESULT within UNION ALL is only allowed after last subquery";
  1186. return {};
  1187. }
  1188. settings = {};
  1189. }
  1190. switch (node.GetBlock2().Alt_case()) {
  1191. case TRule_select_kind_TBlock2::kAlt1:
  1192. res.Source = ProcessCore(node.GetBlock2().GetAlt1().GetRule_process_core1(), settings, selectPos);
  1193. break;
  1194. case TRule_select_kind_TBlock2::kAlt2:
  1195. res.Source = ReduceCore(node.GetBlock2().GetAlt2().GetRule_reduce_core1(), settings, selectPos);
  1196. break;
  1197. case TRule_select_kind_TBlock2::kAlt3: {
  1198. res.Source = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos,
  1199. placement, res.SelectOpOrderBy, res.SelectOpAssumeOrderBy);
  1200. break;
  1201. }
  1202. case TRule_select_kind_TBlock2::ALT_NOT_SET:
  1203. Y_ABORT("You should change implementation according to grammar changes");
  1204. }
  1205. return res;
  1206. }
  1207. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos,
  1208. TMaybe<TSelectKindPlacement> placement)
  1209. {
  1210. if (node.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) {
  1211. return SelectKind(node.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(), selectPos, placement);
  1212. } else {
  1213. return SelectKind(node.GetAlt_select_kind_parenthesis2().GetRule_select_kind_partial2(), selectPos, {});
  1214. }
  1215. }
  1216. template<typename TRule>
  1217. TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult&& first) {
  1218. if (node.GetBlock2().empty()) {
  1219. return std::move(first.Source);
  1220. }
  1221. auto blocks = node.GetBlock2();
  1222. TPosition unionPos = pos; // Position of first select
  1223. TVector<TSortSpecificationPtr> orderBy;
  1224. bool assumeOrderBy = false;
  1225. TNodePtr skipTake;
  1226. TWriteSettings outermostSettings;
  1227. outermostSettings.Discard = first.Settings.Discard;
  1228. TVector<TSourcePtr> sources{ std::move(first.Source)};
  1229. bool currentQuantifier = false;
  1230. for (int i = 0; i < blocks.size(); ++i) {
  1231. auto& b = blocks[i];
  1232. const bool second = (i == 0);
  1233. const bool last = (i + 1 == blocks.size());
  1234. TSelectKindPlacement placement;
  1235. placement.IsLastInSelectOp = last;
  1236. TSelectKindResult next = SelectKind(b.GetRule_select_kind_parenthesis2(), pos, placement);
  1237. if (!next) {
  1238. return nullptr;
  1239. }
  1240. if (last) {
  1241. orderBy = next.SelectOpOrderBy;
  1242. assumeOrderBy = next.SelectOpAssumeOrderBy;
  1243. skipTake = next.SelectOpSkipTake;
  1244. outermostSettings.Label = next.Settings.Label;
  1245. }
  1246. switch (b.GetRule_select_op1().Alt_case()) {
  1247. case TRule_select_op::kAltSelectOp1:
  1248. break;
  1249. case TRule_select_op::kAltSelectOp2:
  1250. case TRule_select_op::kAltSelectOp3:
  1251. Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
  1252. return nullptr;
  1253. case TRule_select_op::ALT_NOT_SET:
  1254. Y_ABORT("You should change implementation according to grammar changes");
  1255. }
  1256. const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2();
  1257. if (!second && quantifier != currentQuantifier) {
  1258. auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {});
  1259. sources.clear();
  1260. sources.emplace_back(std::move(source));
  1261. }
  1262. sources.emplace_back(std::move(next.Source));
  1263. currentQuantifier = quantifier;
  1264. }
  1265. auto result = BuildUnion(pos, std::move(sources), currentQuantifier, outermostSettings);
  1266. if (orderBy) {
  1267. TVector<TNodePtr> groupByExpr;
  1268. TVector<TNodePtr> groupBy;
  1269. bool compactGroupBy = false;
  1270. TString groupBySuffix = "";
  1271. TNodePtr having;
  1272. TWinSpecs winSpecs;
  1273. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
  1274. bool distinct = false;
  1275. TVector<TNodePtr> without;
  1276. bool stream = false;
  1277. TVector<TNodePtr> terms;
  1278. terms.push_back(BuildColumn(unionPos, "*", ""));
  1279. result = BuildSelectCore(Ctx, unionPos, std::move(result), groupByExpr, groupBy, compactGroupBy, groupBySuffix,
  1280. assumeOrderBy, orderBy, having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms),
  1281. distinct, std::move(without), stream, outermostSettings, {}, {});
  1282. result = BuildSelect(unionPos, std::move(result), skipTake);
  1283. } else if (skipTake) {
  1284. result = BuildSelect(unionPos, std::move(result), skipTake);
  1285. }
  1286. return result;
  1287. }
  1288. TSourcePtr TSqlSelect::Build(const TRule_select_stmt& node, TPosition& selectPos) {
  1289. TMaybe<TSelectKindPlacement> placement;
  1290. if (!node.GetBlock2().empty()) {
  1291. placement.ConstructInPlace();
  1292. placement->IsFirstInSelectOp = true;
  1293. }
  1294. auto res = SelectKind(node.GetRule_select_kind_parenthesis1(), selectPos, placement);
  1295. if (!res) {
  1296. return nullptr;
  1297. }
  1298. return Build(node, selectPos, std::move(res));
  1299. }
  1300. TSourcePtr TSqlSelect::Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos) {
  1301. TMaybe<TSelectKindPlacement> placement;
  1302. if (!node.GetBlock2().empty()) {
  1303. placement.ConstructInPlace();
  1304. placement->IsFirstInSelectOp = true;
  1305. }
  1306. auto res = SelectKind(node.GetRule_select_kind_partial1(), selectPos, placement);
  1307. if (!res) {
  1308. return nullptr;
  1309. }
  1310. return Build(node, selectPos, std::move(res));
  1311. }
  1312. } // namespace NSQLTranslationV1