sql_select.cpp 56 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471
  1. #include "sql_select.h"
  2. #include "sql_call_expr.h"
  3. #include "sql_expression.h"
  4. #include "sql_group_by.h"
  5. #include "sql_values.h"
  6. #include "sql_match_recognize.h"
  7. namespace NSQLTranslationV1 {
  8. using namespace NSQLv1Generated;
  9. namespace {
  10. bool IsColumnsOnly(const TVector<TSortSpecificationPtr>& container) {
  11. for (const auto& elem: container) {
  12. if (!elem->OrderExpr->GetColumnName()) {
  13. return false;
  14. }
  15. }
  16. return true;
  17. }
  18. bool CollectJoinLinkSettings(TPosition pos, TJoinLinkSettings& linkSettings, TContext& ctx) {
  19. linkSettings = {};
  20. auto hints = ctx.PullHintForToken(pos);
  21. for (const auto& hint: hints) {
  22. const auto canonizedName = to_lower(hint.Name);
  23. auto newStrategy = TJoinLinkSettings::EStrategy::Default;
  24. if (canonizedName == "merge") {
  25. newStrategy = TJoinLinkSettings::EStrategy::SortedMerge;
  26. } else if (canonizedName == "streamlookup") {
  27. newStrategy = TJoinLinkSettings::EStrategy::StreamLookup;
  28. } else if (canonizedName == "map") {
  29. newStrategy = TJoinLinkSettings::EStrategy::ForceMap;
  30. } else if (canonizedName == "grace") {
  31. newStrategy = TJoinLinkSettings::EStrategy::ForceGrace;
  32. } else if (canonizedName == "compact") {
  33. linkSettings.Compact = true;
  34. continue;
  35. } else {
  36. ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Unsupported join hint: " << hint.Name;
  37. }
  38. if (TJoinLinkSettings::EStrategy::Default == linkSettings.Strategy) {
  39. linkSettings.Strategy = newStrategy;
  40. linkSettings.Values = hint.Values;
  41. } else if (newStrategy == linkSettings.Strategy) {
  42. ctx.Error() << "Duplicate join strategy hint";
  43. return false;
  44. } else {
  45. ctx.Error() << "Conflicting join strategy hints";
  46. return false;
  47. }
  48. }
  49. return true;
  50. }
  51. } // namespace
  52. bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock3& block, TMaybe<TPosition> anyPos) {
  53. // block: (join_op (ANY)? flatten_source join_constraint?)
  54. // join_op:
  55. // COMMA
  56. // | (NATURAL)? ((LEFT (ONLY | SEMI)? | RIGHT (ONLY | SEMI)? | EXCLUSION | FULL)? (OUTER)? | INNER | CROSS) JOIN
  57. //;
  58. const auto& node = block.GetRule_join_op1();
  59. TString joinOp("Inner");
  60. TJoinLinkSettings linkSettings;
  61. switch (node.Alt_case()) {
  62. case TRule_join_op::kAltJoinOp1: {
  63. joinOp = "Cross";
  64. if (!Ctx.AnsiImplicitCrossJoin) {
  65. Error() << "Cartesian product of tables is disabled. Please use "
  66. "explicit CROSS JOIN or enable it via PRAGMA AnsiImplicitCrossJoin";
  67. return false;
  68. }
  69. auto alt = node.GetAlt_join_op1();
  70. if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken1()), linkSettings, Ctx)) {
  71. return false;
  72. }
  73. Ctx.IncrementMonCounter("sql_join_operations", "CartesianProduct");
  74. break;
  75. }
  76. case TRule_join_op::kAltJoinOp2: {
  77. auto alt = node.GetAlt_join_op2();
  78. if (alt.HasBlock1()) {
  79. Ctx.IncrementMonCounter("sql_join_operations", "Natural");
  80. Error() << "Natural join is not implemented yet";
  81. return false;
  82. }
  83. if (!CollectJoinLinkSettings(Ctx.TokenPosition(alt.GetToken3()), linkSettings, Ctx)) {
  84. return false;
  85. }
  86. switch (alt.GetBlock2().Alt_case()) {
  87. case TRule_join_op::TAlt2::TBlock2::kAlt1:
  88. if (alt.GetBlock2().GetAlt1().HasBlock1()) {
  89. auto block = alt.GetBlock2().GetAlt1().GetBlock1();
  90. switch (block.Alt_case()) {
  91. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt1:
  92. // left
  93. joinOp = Token(block.GetAlt1().GetToken1());
  94. if (block.GetAlt1().HasBlock2()) {
  95. joinOp += " " + Token(block.GetAlt1().GetBlock2().GetToken1());
  96. }
  97. break;
  98. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt2:
  99. // right
  100. joinOp = Token(block.GetAlt2().GetToken1());
  101. if (block.GetAlt2().HasBlock2()) {
  102. joinOp += " " + Token(block.GetAlt2().GetBlock2().GetToken1());
  103. }
  104. break;
  105. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt3:
  106. // exclusion
  107. joinOp = Token(block.GetAlt3().GetToken1());
  108. break;
  109. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::kAlt4:
  110. // full
  111. joinOp = Token(block.GetAlt4().GetToken1());
  112. break;
  113. case TRule_join_op_TAlt2_TBlock2_TAlt1_TBlock1::ALT_NOT_SET:
  114. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
  115. AltNotImplemented("join_op", node);
  116. return false;
  117. }
  118. }
  119. if (alt.GetBlock2().GetAlt1().HasBlock2()) {
  120. TString normalizedOp = alt.GetBlock2().GetAlt1().HasBlock1() ? joinOp : "";
  121. normalizedOp.to_upper();
  122. if (!(normalizedOp == "LEFT" || normalizedOp == "RIGHT" || normalizedOp == "FULL")) {
  123. Token(alt.GetBlock2().GetAlt1().GetBlock2().GetToken1());
  124. Error() << "Invalid join type: " << normalizedOp << (normalizedOp.empty() ? "" : " ") << "OUTER JOIN. "
  125. << "OUTER keyword is optional and can only be used after LEFT, RIGHT or FULL";
  126. Ctx.IncrementMonCounter("sql_errors", "BadJoinType");
  127. return false;
  128. }
  129. }
  130. break;
  131. case TRule_join_op::TAlt2::TBlock2::kAlt2:
  132. joinOp = Token(alt.GetBlock2().GetAlt2().GetToken1());
  133. break;
  134. case TRule_join_op::TAlt2::TBlock2::kAlt3:
  135. joinOp = Token(alt.GetBlock2().GetAlt3().GetToken1());
  136. break;
  137. case TRule_join_op::TAlt2::TBlock2::ALT_NOT_SET:
  138. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation");
  139. AltNotImplemented("join_op", node);
  140. return false;
  141. }
  142. Ctx.IncrementMonCounter("sql_features", "Join");
  143. Ctx.IncrementMonCounter("sql_join_operations", joinOp);
  144. break;
  145. }
  146. case TRule_join_op::ALT_NOT_SET:
  147. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinOperation2");
  148. AltNotImplemented("join_op", node);
  149. return false;
  150. }
  151. joinOp = NormalizeJoinOp(joinOp);
  152. if (linkSettings.Strategy != TJoinLinkSettings::EStrategy::Default && joinOp == "Cross") {
  153. Ctx.Warning(Ctx.Pos(), TIssuesIds::YQL_UNUSED_HINT) << "Non-default join strategy will not be used for CROSS JOIN";
  154. linkSettings.Strategy = TJoinLinkSettings::EStrategy::Default;
  155. }
  156. TNodePtr joinKeyExpr;
  157. if (block.HasBlock4()) {
  158. if (joinOp == "Cross") {
  159. Error() << "Cross join should not have ON or USING expression";
  160. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  161. return false;
  162. }
  163. joinKeyExpr = JoinExpr(join, block.GetBlock4().GetRule_join_constraint1());
  164. if (!joinKeyExpr) {
  165. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  166. return false;
  167. }
  168. }
  169. else {
  170. if (joinOp != "Cross") {
  171. Error() << "Expected ON or USING expression";
  172. Ctx.IncrementMonCounter("sql_errors", "BadJoinExpr");
  173. return false;
  174. }
  175. }
  176. if (joinOp == "Cross" && anyPos) {
  177. Ctx.Error(*anyPos) << "ANY should not be used with Cross JOIN";
  178. Ctx.IncrementMonCounter("sql_errors", "BadJoinAny");
  179. return false;
  180. }
  181. Y_DEBUG_ABORT_UNLESS(join->GetJoin());
  182. join->GetJoin()->SetupJoin(joinOp, joinKeyExpr, linkSettings);
  183. return true;
  184. }
  185. TNodePtr TSqlSelect::JoinExpr(ISource* join, const TRule_join_constraint& node) {
  186. switch (node.Alt_case()) {
  187. case TRule_join_constraint::kAltJoinConstraint1: {
  188. auto& alt = node.GetAlt_join_constraint1();
  189. Token(alt.GetToken1());
  190. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  191. TSqlExpression expr(Ctx, Mode);
  192. return expr.Build(alt.GetRule_expr2());
  193. }
  194. case TRule_join_constraint::kAltJoinConstraint2: {
  195. auto& alt = node.GetAlt_join_constraint2();
  196. Token(alt.GetToken1());
  197. TPosition pos(Ctx.Pos());
  198. TVector<TDeferredAtom> names;
  199. if (!PureColumnOrNamedListStr(alt.GetRule_pure_column_or_named_list2(), *this, names)) {
  200. return nullptr;
  201. }
  202. Y_DEBUG_ABORT_UNLESS(join->GetJoin());
  203. return join->GetJoin()->BuildJoinKeys(Ctx, names);
  204. }
  205. case TRule_join_constraint::ALT_NOT_SET:
  206. Ctx.IncrementMonCounter("sql_errors", "UnknownJoinConstraint");
  207. AltNotImplemented("join_constraint", node);
  208. break;
  209. }
  210. return nullptr;
  211. }
  212. bool TSqlSelect::FlattenByArg(const TString& sourceLabel, TVector<TNodePtr>& flattenByColumns, TVector<TNodePtr>& flattenByExprs,
  213. const TRule_flatten_by_arg& node)
  214. {
  215. // flatten_by_arg:
  216. // named_column
  217. // | LPAREN named_expr_list COMMA? RPAREN
  218. // ;
  219. flattenByColumns.clear();
  220. flattenByExprs.clear();
  221. TVector<TNodePtr> namedExprs;
  222. switch (node.Alt_case()) {
  223. case TRule_flatten_by_arg::kAltFlattenByArg1: {
  224. TVector<TNodePtr> columns;
  225. if (!NamedColumn(columns, node.GetAlt_flatten_by_arg1().GetRule_named_column1())) {
  226. return false;
  227. }
  228. YQL_ENSURE(columns.size() == 1);
  229. auto& column = columns.back();
  230. auto columnNamePtr = column->GetColumnName();
  231. YQL_ENSURE(columnNamePtr && *columnNamePtr);
  232. auto sourcePtr = column->GetSourceName();
  233. const bool isEmptySource = !sourcePtr || !*sourcePtr;
  234. if (isEmptySource || *sourcePtr == sourceLabel) {
  235. // select * from T flatten by x
  236. // select * from T as s flatten by x
  237. // select * from T as s flatten by s.x
  238. flattenByColumns.emplace_back(std::move(column));
  239. } else {
  240. // select * from T as s flatten by x.y as z
  241. if (!column->GetLabel()) {
  242. Ctx.Error(column->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
  243. return false;
  244. }
  245. flattenByColumns.emplace_back(BuildColumn(column->GetPos(), column->GetLabel()));
  246. TVector<INode::TIdPart> ids;
  247. ids.push_back(BuildColumn(column->GetPos()));
  248. ids.push_back(*sourcePtr);
  249. ids.push_back(*columnNamePtr);
  250. auto node = BuildAccess(column->GetPos(), ids, false);
  251. node->SetLabel(column->GetLabel());
  252. flattenByExprs.emplace_back(std::move(node));
  253. }
  254. break;
  255. }
  256. case TRule_flatten_by_arg::kAltFlattenByArg2: {
  257. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  258. if (!NamedExprList(node.GetAlt_flatten_by_arg2().GetRule_named_expr_list2(), namedExprs) || Ctx.HasPendingErrors) {
  259. return false;
  260. }
  261. for (auto& namedExprNode : namedExprs) {
  262. YQL_ENSURE(!namedExprNode->ContentListPtr());
  263. auto sourcePtr = namedExprNode->GetSourceName();
  264. const bool isEmptySource = !sourcePtr || !*sourcePtr;
  265. auto columnNamePtr = namedExprNode->GetColumnName();
  266. if (columnNamePtr && (isEmptySource || *sourcePtr == sourceLabel)) {
  267. namedExprNode->AssumeColumn();
  268. flattenByColumns.emplace_back(std::move(namedExprNode));
  269. } else {
  270. auto nodeLabel = namedExprNode->GetLabel();
  271. if (!nodeLabel) {
  272. Ctx.Error(namedExprNode->GetPos()) << "Unnamed expression after FLATTEN BY is not allowed";
  273. return false;
  274. }
  275. flattenByColumns.emplace_back(BuildColumn(namedExprNode->GetPos(), nodeLabel));
  276. flattenByExprs.emplace_back(std::move(namedExprNode));
  277. }
  278. }
  279. break;
  280. }
  281. case TRule_flatten_by_arg::ALT_NOT_SET:
  282. Ctx.IncrementMonCounter("sql_errors", "UnknownFlattenByArg");
  283. AltNotImplemented("flatten_by_arg", node);
  284. return false;
  285. }
  286. return true;
  287. }
  288. TSourcePtr TSqlSelect::FlattenSource(const TRule_flatten_source& node) {
  289. auto source = NamedSingleSource(node.GetRule_named_single_source1(), true);
  290. if (!source) {
  291. return nullptr;
  292. }
  293. if (node.HasBlock2()) {
  294. auto flatten = node.GetBlock2();
  295. auto flatten2 = flatten.GetBlock2();
  296. switch (flatten2.Alt_case()) {
  297. case TRule_flatten_source::TBlock2::TBlock2::kAlt1: {
  298. TString mode = "auto";
  299. if (flatten2.GetAlt1().HasBlock1()) {
  300. mode = to_lower(Token(flatten2.GetAlt1().GetBlock1().GetToken1()));
  301. }
  302. TVector<TNodePtr> flattenByColumns;
  303. TVector<TNodePtr> flattenByExprs;
  304. if (!FlattenByArg(source->GetLabel(), flattenByColumns, flattenByExprs, flatten2.GetAlt1().GetRule_flatten_by_arg3())) {
  305. return nullptr;
  306. }
  307. Ctx.IncrementMonCounter("sql_features", "FlattenByColumns");
  308. if (!source->AddExpressions(Ctx, flattenByColumns, EExprSeat::FlattenBy)) {
  309. return nullptr;
  310. }
  311. if (!source->AddExpressions(Ctx, flattenByExprs, EExprSeat::FlattenByExpr)) {
  312. return nullptr;
  313. }
  314. source->SetFlattenByMode(mode);
  315. break;
  316. }
  317. case TRule_flatten_source::TBlock2::TBlock2::kAlt2: {
  318. Ctx.IncrementMonCounter("sql_features", "FlattenColumns");
  319. source->MarkFlattenColumns();
  320. break;
  321. }
  322. case TRule_flatten_source::TBlock2::TBlock2::ALT_NOT_SET:
  323. Ctx.IncrementMonCounter("sql_errors", "UnknownOrdinaryNamedColumn");
  324. AltNotImplemented("flatten_source", flatten2);
  325. }
  326. }
  327. return source;
  328. }
  329. TSourcePtr TSqlSelect::JoinSource(const TRule_join_source& node) {
  330. // join_source: (ANY)? flatten_source (join_op (ANY)? flatten_source join_constraint?)*;
  331. if (node.HasBlock1() && !node.Block3Size()) {
  332. Error() << "ANY is not allowed without JOIN";
  333. return nullptr;
  334. }
  335. TSourcePtr source(FlattenSource(node.GetRule_flatten_source2()));
  336. if (!source) {
  337. return nullptr;
  338. }
  339. if (node.Block3Size()) {
  340. TPosition pos(Ctx.Pos());
  341. TVector<TSourcePtr> sources;
  342. TVector<TMaybe<TPosition>> anyPositions;
  343. TVector<bool> anyFlags;
  344. sources.emplace_back(std::move(source));
  345. anyPositions.emplace_back(node.HasBlock1() ? Ctx.TokenPosition(node.GetBlock1().GetToken1()) : TMaybe<TPosition>());
  346. anyFlags.push_back(bool(anyPositions.back()));
  347. for (auto& block: node.GetBlock3()) {
  348. sources.emplace_back(FlattenSource(block.GetRule_flatten_source3()));
  349. if (!sources.back()) {
  350. Ctx.IncrementMonCounter("sql_errors", "NoJoinWith");
  351. return nullptr;
  352. }
  353. anyPositions.emplace_back(block.HasBlock2() ? Ctx.TokenPosition(block.GetBlock2().GetToken1()) : TMaybe<TPosition>());
  354. anyFlags.push_back(bool(anyPositions.back()));
  355. }
  356. source = BuildEquiJoin(pos, std::move(sources), std::move(anyFlags), Ctx.Scoped->StrictJoinKeyTypes);
  357. size_t idx = 1;
  358. for (auto& block: node.GetBlock3()) {
  359. YQL_ENSURE(idx < anyPositions.size());
  360. TMaybe<TPosition> leftAny = (idx == 1) ? anyPositions[0] : Nothing();
  361. TMaybe<TPosition> rightAny = anyPositions[idx];
  362. if (!JoinOp(source.Get(), block, leftAny ? leftAny : rightAny)) {
  363. Ctx.IncrementMonCounter("sql_errors", "NoJoinOp");
  364. return nullptr;
  365. }
  366. ++idx;
  367. }
  368. }
  369. return source;
  370. }
  371. bool TSqlSelect::SelectTerm(TVector<TNodePtr>& terms, const TRule_result_column& node) {
  372. // result_column:
  373. // opt_id_prefix ASTERISK
  374. // | expr ((AS an_id) | an_id_pure)?
  375. // ;
  376. switch (node.Alt_case()) {
  377. case TRule_result_column::kAltResultColumn1: {
  378. auto alt = node.GetAlt_result_column1();
  379. Token(alt.GetToken2());
  380. auto idAsteriskQualify = OptIdPrefixAsStr(alt.GetRule_opt_id_prefix1(), *this);
  381. Ctx.IncrementMonCounter("sql_features", idAsteriskQualify ? "QualifyAsterisk" : "Asterisk");
  382. terms.push_back(BuildColumn(Ctx.Pos(), "*", idAsteriskQualify));
  383. break;
  384. }
  385. case TRule_result_column::kAltResultColumn2: {
  386. auto alt = node.GetAlt_result_column2();
  387. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  388. TSqlExpression expr(Ctx, Mode);
  389. TNodePtr term(expr.Build(alt.GetRule_expr1()));
  390. if (!term) {
  391. Ctx.IncrementMonCounter("sql_errors", "NoTerm");
  392. return false;
  393. }
  394. if (alt.HasBlock2()) {
  395. TString label;
  396. bool implicitLabel = false;
  397. switch (alt.GetBlock2().Alt_case()) {
  398. case TRule_result_column_TAlt2_TBlock2::kAlt1:
  399. label = Id(alt.GetBlock2().GetAlt1().GetRule_an_id_or_type2(), *this);
  400. break;
  401. case TRule_result_column_TAlt2_TBlock2::kAlt2:
  402. label = Id(alt.GetBlock2().GetAlt2().GetRule_an_id_as_compat1(), *this);
  403. if (!Ctx.AnsiOptionalAs) {
  404. // AS is mandatory
  405. Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
  406. return false;
  407. }
  408. implicitLabel = true;
  409. break;
  410. case TRule_result_column_TAlt2_TBlock2::ALT_NOT_SET:
  411. Y_ABORT("You should change implementation according to grammar changes");
  412. }
  413. term->SetLabel(label, Ctx.Pos());
  414. term->MarkImplicitLabel(implicitLabel);
  415. }
  416. terms.push_back(term);
  417. break;
  418. }
  419. case TRule_result_column::ALT_NOT_SET:
  420. Ctx.IncrementMonCounter("sql_errors", "UnknownResultColumn");
  421. AltNotImplemented("result_column", node);
  422. return false;
  423. }
  424. return true;
  425. }
  426. bool TSqlSelect::ValidateSelectColumns(const TVector<TNodePtr>& terms) {
  427. TSet<TString> labels;
  428. TSet<TString> asteriskSources;
  429. for (const auto& term: terms) {
  430. const auto& label = term->GetLabel();
  431. if (!Ctx.PragmaAllowDotInAlias && label.find('.') != TString::npos) {
  432. Ctx.Error(term->GetPos()) << "Unable to use '.' in column name. Invalid column name: " << label;
  433. return false;
  434. }
  435. if (!label.empty()) {
  436. if (!labels.insert(label).second) {
  437. Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << label;
  438. return false;
  439. }
  440. }
  441. if (term->IsAsterisk()) {
  442. const auto& source = *term->GetSourceName();
  443. if (source.empty() && terms.ysize() > 1) {
  444. Ctx.Error(term->GetPos()) << "Unable to use plain '*' with other projection items. Please use qualified asterisk instead: '<table>.*' (<table> can be either table name or table alias).";
  445. return false;
  446. } else if (!asteriskSources.insert(source).second) {
  447. Ctx.Error(term->GetPos()) << "Unable to use twice same quialified asterisk. Invalid source: " << source;
  448. return false;
  449. }
  450. } else if (label.empty()) {
  451. const auto* column = term->GetColumnName();
  452. if (column && !column->empty()) {
  453. const auto& source = *term->GetSourceName();
  454. const auto usedName = source.empty() ? *column : source + '.' + *column;
  455. if (!labels.insert(usedName).second) {
  456. Ctx.Error(term->GetPos()) << "Unable to use duplicate column names. Collision in name: " << usedName;
  457. return false;
  458. }
  459. }
  460. }
  461. }
  462. return true;
  463. }
  464. TSourcePtr TSqlSelect::SingleSource(const TRule_single_source& node, const TVector<TString>& derivedColumns, TPosition derivedColumnsPos, bool unorderedSubquery) {
  465. switch (node.Alt_case()) {
  466. case TRule_single_source::kAltSingleSource1: {
  467. const auto& alt = node.GetAlt_single_source1();
  468. const auto& table_ref = alt.GetRule_table_ref1();
  469. if (auto maybeSource = AsTableImpl(table_ref)) {
  470. auto source = *maybeSource;
  471. if (!source) {
  472. return nullptr;
  473. }
  474. return source;
  475. } else {
  476. TTableRef table;
  477. if (!TableRefImpl(alt.GetRule_table_ref1(), table, unorderedSubquery)) {
  478. return nullptr;
  479. }
  480. if (table.Source) {
  481. return table.Source;
  482. }
  483. TPosition pos(Ctx.Pos());
  484. Ctx.IncrementMonCounter("sql_select_clusters", table.Cluster.GetLiteral() ? *table.Cluster.GetLiteral() : "unknown");
  485. return BuildTableSource(pos, table);
  486. }
  487. }
  488. case TRule_single_source::kAltSingleSource2: {
  489. const auto& alt = node.GetAlt_single_source2();
  490. Token(alt.GetToken1());
  491. TSqlSelect innerSelect(Ctx, Mode);
  492. TPosition pos;
  493. auto source = innerSelect.Build(alt.GetRule_select_stmt2(), pos);
  494. if (!source) {
  495. return nullptr;
  496. }
  497. return BuildInnerSource(pos, BuildSourceNode(pos, std::move(source)), Ctx.Scoped->CurrService, Ctx.Scoped->CurrCluster);
  498. }
  499. case TRule_single_source::kAltSingleSource3: {
  500. const auto& alt = node.GetAlt_single_source3();
  501. TPosition pos;
  502. return TSqlValues(Ctx, Mode).Build(alt.GetRule_values_stmt2(), pos, derivedColumns, derivedColumnsPos);
  503. }
  504. case TRule_single_source::ALT_NOT_SET:
  505. AltNotImplemented("single_source", node);
  506. Ctx.IncrementMonCounter("sql_errors", "UnknownSingleSource");
  507. return nullptr;
  508. }
  509. }
  510. TSourcePtr TSqlSelect::NamedSingleSource(const TRule_named_single_source& node, bool unorderedSubquery) {
  511. // named_single_source: single_source match_recognize_clause? (((AS an_id) | an_id_as_compat) pure_column_list?)? (sample_clause | tablesample_clause)?;
  512. TVector<TString> derivedColumns;
  513. TPosition derivedColumnsPos;
  514. if (node.HasBlock3() && node.GetBlock3().HasBlock2()) {
  515. const auto& columns = node.GetBlock3().GetBlock2().GetRule_pure_column_list1();
  516. Token(columns.GetToken1());
  517. derivedColumnsPos = Ctx.Pos();
  518. if (node.GetRule_single_source1().Alt_case() != TRule_single_source::kAltSingleSource3) {
  519. Error() << "Derived column list is only supported for VALUES";
  520. return nullptr;
  521. }
  522. PureColumnListStr(columns, *this, derivedColumns);
  523. }
  524. auto singleSource = SingleSource(node.GetRule_single_source1(), derivedColumns, derivedColumnsPos, unorderedSubquery);
  525. if (!singleSource) {
  526. return nullptr;
  527. }
  528. if (node.HasBlock2()) {
  529. if (node.HasBlock4()) {
  530. //CAN/CSA-ISO/IEC 9075-2:18 7.6 <table reference>
  531. //4) TF shall not simply contain both a <sample clause> and a <row pattern recognition clause and name>.
  532. Ctx.Error() << "Source shall not simply contain both a sample clause and a row pattern recognition clause";
  533. return {};
  534. }
  535. auto matchRecognizeClause = TSqlMatchRecognizeClause(Ctx, Mode);
  536. auto matchRecognize = matchRecognizeClause.CreateBuilder(node.GetBlock2().GetRule_row_pattern_recognition_clause1());
  537. singleSource->SetMatchRecognize(matchRecognize);
  538. }
  539. if (node.HasBlock3()) {
  540. TString label;
  541. switch (node.GetBlock3().GetBlock1().Alt_case()) {
  542. case TRule_named_single_source_TBlock3_TBlock1::kAlt1:
  543. label = Id(node.GetBlock3().GetBlock1().GetAlt1().GetRule_an_id2(), *this);
  544. break;
  545. case TRule_named_single_source_TBlock3_TBlock1::kAlt2:
  546. label = Id(node.GetBlock3().GetBlock1().GetAlt2().GetRule_an_id_as_compat1(), *this);
  547. if (!Ctx.AnsiOptionalAs) {
  548. // AS is mandatory
  549. Ctx.Error() << "Expecting mandatory AS here. Did you miss comma? Please add PRAGMA AnsiOptionalAs; for ANSI compatibility";
  550. return {};
  551. }
  552. break;
  553. case TRule_named_single_source_TBlock3_TBlock1::ALT_NOT_SET:
  554. Y_ABORT("You should change implementation according to grammar changes");
  555. }
  556. singleSource->SetLabel(label);
  557. }
  558. if (node.HasBlock4()) {
  559. ESampleClause sampleClause;
  560. ESampleMode mode;
  561. TSqlExpression expr(Ctx, Mode);
  562. TNodePtr samplingRateNode;
  563. TNodePtr samplingSeedNode;
  564. const auto& sampleBlock = node.GetBlock4();
  565. TPosition pos;
  566. switch (sampleBlock.Alt_case()) {
  567. case TRule_named_single_source::TBlock4::kAlt1:
  568. {
  569. sampleClause = ESampleClause::Sample;
  570. mode = ESampleMode::Bernoulli;
  571. const auto& sampleExpr = sampleBlock.GetAlt1().GetRule_sample_clause1().GetRule_expr2();
  572. samplingRateNode = expr.Build(sampleExpr);
  573. if (!samplingRateNode) {
  574. return nullptr;
  575. }
  576. pos = GetPos(sampleBlock.GetAlt1().GetRule_sample_clause1().GetToken1());
  577. Ctx.IncrementMonCounter("sql_features", "SampleClause");
  578. }
  579. break;
  580. case TRule_named_single_source::TBlock4::kAlt2:
  581. {
  582. sampleClause = ESampleClause::TableSample;
  583. const auto& tableSampleClause = sampleBlock.GetAlt2().GetRule_tablesample_clause1();
  584. const auto& modeToken = tableSampleClause.GetRule_sampling_mode2().GetToken1();
  585. const TCiString& token = Token(modeToken);
  586. if (token == "system") {
  587. mode = ESampleMode::System;
  588. } else if (token == "bernoulli") {
  589. mode = ESampleMode::Bernoulli;
  590. } else {
  591. Ctx.Error(GetPos(modeToken)) << "Unsupported sampling mode: " << token;
  592. Ctx.IncrementMonCounter("sql_errors", "UnsupportedSamplingMode");
  593. return nullptr;
  594. }
  595. const auto& tableSampleExpr = tableSampleClause.GetRule_expr4();
  596. samplingRateNode = expr.Build(tableSampleExpr);
  597. if (!samplingRateNode) {
  598. return nullptr;
  599. }
  600. if (tableSampleClause.HasBlock6()) {
  601. const auto& repeatableExpr = tableSampleClause.GetBlock6().GetRule_repeatable_clause1().GetRule_expr3();
  602. samplingSeedNode = expr.Build(repeatableExpr);
  603. if (!samplingSeedNode) {
  604. return nullptr;
  605. }
  606. }
  607. pos = GetPos(sampleBlock.GetAlt2().GetRule_tablesample_clause1().GetToken1());
  608. Ctx.IncrementMonCounter("sql_features", "SampleClause");
  609. }
  610. break;
  611. case TRule_named_single_source::TBlock4::ALT_NOT_SET:
  612. Y_ABORT("SampleClause: does not corresond to grammar changes");
  613. }
  614. if (!singleSource->SetSamplingOptions(Ctx, pos, sampleClause, mode, samplingRateNode, samplingSeedNode)) {
  615. Ctx.IncrementMonCounter("sql_errors", "IncorrectSampleClause");
  616. return nullptr;
  617. }
  618. }
  619. return singleSource;
  620. }
  621. bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_column_name& node) {
  622. const auto sourceName = OptIdPrefixAsStr(node.GetRule_opt_id_prefix1(), *this);
  623. const auto columnName = Id(node.GetRule_an_id2(), *this);
  624. if (columnName.empty()) {
  625. // TDOD: Id() should return TMaybe<TString>
  626. if (!Ctx.HasPendingErrors) {
  627. Ctx.Error() << "Empty column name is not allowed";
  628. }
  629. return false;
  630. }
  631. keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
  632. return true;
  633. }
  634. bool TSqlSelect::ColumnName(TVector<TNodePtr>& keys, const TRule_without_column_name& node) {
  635. // without_column_name: (an_id DOT an_id) | an_id_without;
  636. TString sourceName;
  637. TString columnName;
  638. switch (node.Alt_case()) {
  639. case TRule_without_column_name::kAltWithoutColumnName1:
  640. sourceName = Id(node.GetAlt_without_column_name1().GetRule_an_id1(), *this);
  641. columnName = Id(node.GetAlt_without_column_name1().GetRule_an_id3(), *this);
  642. break;
  643. case TRule_without_column_name::kAltWithoutColumnName2:
  644. columnName = Id(node.GetAlt_without_column_name2().GetRule_an_id_without1(), *this);
  645. break;
  646. case TRule_without_column_name::ALT_NOT_SET:
  647. Y_ABORT("You should change implementation according to grammar changes");
  648. }
  649. if (columnName.empty()) {
  650. // TDOD: Id() should return TMaybe<TString>
  651. if (!Ctx.HasPendingErrors) {
  652. Ctx.Error() << "Empty column name is not allowed";
  653. }
  654. return false;
  655. }
  656. keys.push_back(BuildColumn(Ctx.Pos(), columnName, sourceName));
  657. return true;
  658. }
  659. template<typename TRule>
  660. bool TSqlSelect::ColumnList(TVector<TNodePtr>& keys, const TRule& node) {
  661. bool result;
  662. if constexpr (std::is_same_v<TRule, TRule_column_list>) {
  663. result = ColumnName(keys, node.GetRule_column_name1());
  664. } else {
  665. result = ColumnName(keys, node.GetRule_without_column_name1());
  666. }
  667. if (!result) {
  668. return false;
  669. }
  670. for (auto b: node.GetBlock2()) {
  671. Token(b.GetToken1());
  672. if constexpr (std::is_same_v<TRule, TRule_column_list>) {
  673. result = ColumnName(keys, b.GetRule_column_name2());
  674. } else {
  675. result = ColumnName(keys, b.GetRule_without_column_name2());
  676. }
  677. if (!result) {
  678. return false;
  679. }
  680. }
  681. return true;
  682. }
  683. bool TSqlSelect::NamedColumn(TVector<TNodePtr>& columnList, const TRule_named_column& node) {
  684. if (!ColumnName(columnList, node.GetRule_column_name1())) {
  685. return false;
  686. }
  687. if (node.HasBlock2()) {
  688. const auto label = Id(node.GetBlock2().GetRule_an_id2(), *this);
  689. columnList.back()->SetLabel(label);
  690. }
  691. return true;
  692. }
  693. TSourcePtr TSqlSelect::ProcessCore(const TRule_process_core& node, const TWriteSettings& settings, TPosition& selectPos) {
  694. // PROCESS STREAM? named_single_source (COMMA named_single_source)* (USING using_call_expr (AS an_id)?
  695. // (WITH external_call_settings)?
  696. // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?)?
  697. Token(node.GetToken1());
  698. TPosition startPos(Ctx.Pos());
  699. if (!selectPos) {
  700. selectPos = startPos;
  701. }
  702. const bool hasUsing = node.HasBlock5();
  703. const bool unorderedSubquery = hasUsing;
  704. TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source3(), unorderedSubquery));
  705. if (!source) {
  706. return nullptr;
  707. }
  708. if (node.GetBlock4().size()) {
  709. TVector<TSourcePtr> sources(1, source);
  710. for (auto& s: node.GetBlock4()) {
  711. sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), unorderedSubquery));
  712. if (!sources.back()) {
  713. return nullptr;
  714. }
  715. }
  716. auto pos = source->GetPos();
  717. source = BuildMuxSource(pos, std::move(sources));
  718. }
  719. const bool processStream = node.HasBlock2();
  720. if (!hasUsing) {
  721. return BuildProcess(startPos, std::move(source), nullptr, false, {}, false, processStream, settings, {});
  722. }
  723. const auto& block5 = node.GetBlock5();
  724. if (block5.HasBlock5()) {
  725. TSqlExpression expr(Ctx, Mode);
  726. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  727. TNodePtr where = expr.Build(block5.GetBlock5().GetRule_expr2());
  728. if (!where || !source->AddFilter(Ctx, where)) {
  729. return nullptr;
  730. }
  731. Ctx.IncrementMonCounter("sql_features", "ProcessWhere");
  732. } else {
  733. Ctx.IncrementMonCounter("sql_features", processStream ? "ProcessStream" : "Process");
  734. }
  735. if (block5.HasBlock6()) {
  736. Ctx.Error() << "PROCESS does not allow HAVING yet! You may request it on yql@ maillist.";
  737. return nullptr;
  738. }
  739. bool listCall = false;
  740. TSqlCallExpr call(Ctx, Mode);
  741. bool initRet = call.Init(block5.GetRule_using_call_expr2());
  742. if (initRet) {
  743. call.IncCounters();
  744. }
  745. if (!initRet) {
  746. return nullptr;
  747. }
  748. auto args = call.GetArgs();
  749. for (auto& arg: args) {
  750. if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
  751. if (listCall) {
  752. Ctx.Error() << "Only one TableRows() argument is allowed.";
  753. return nullptr;
  754. }
  755. listCall = true;
  756. }
  757. }
  758. if (!call.IsExternal() && block5.HasBlock4()) {
  759. Ctx.Error() << "PROCESS without USING EXTERNAL FUNCTION doesn't allow WITH block";
  760. return nullptr;
  761. }
  762. if (block5.HasBlock4()) {
  763. const auto& block54 = block5.GetBlock4();
  764. if (!call.ConfigureExternalCall(block54.GetRule_external_call_settings2())) {
  765. return nullptr;
  766. }
  767. }
  768. TSqlCallExpr finalCall(call, args);
  769. TNodePtr with(finalCall.IsExternal() ? finalCall.BuildCall() : finalCall.BuildUdf(/* forReduce = */ false));
  770. if (!with) {
  771. return {};
  772. }
  773. args = finalCall.GetArgs();
  774. if (call.IsExternal())
  775. listCall = true;
  776. if (block5.HasBlock3()) {
  777. with->SetLabel(Id(block5.GetBlock3().GetRule_an_id2(), *this));
  778. }
  779. if (call.IsExternal() && block5.HasBlock7()) {
  780. Ctx.Error() << "PROCESS with USING EXTERNAL FUNCTION doesn't allow ASSUME block";
  781. return nullptr;
  782. }
  783. TVector<TSortSpecificationPtr> assumeOrderBy;
  784. if (block5.HasBlock7()) {
  785. if (!OrderByClause(block5.GetBlock7().GetRule_order_by_clause2(), assumeOrderBy)) {
  786. return nullptr;
  787. }
  788. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
  789. }
  790. return BuildProcess(startPos, std::move(source), with, finalCall.IsExternal(), std::move(args), listCall, processStream, settings, assumeOrderBy);
  791. }
  792. TSourcePtr TSqlSelect::ReduceCore(const TRule_reduce_core& node, const TWriteSettings& settings, TPosition& selectPos) {
  793. // REDUCE named_single_source (COMMA named_single_source)* (PRESORT sort_specification_list)?
  794. // ON column_list USING ALL? using_call_expr (AS an_id)?
  795. // (WHERE expr)? (HAVING expr)? (ASSUME order_by_clause)?
  796. Token(node.GetToken1());
  797. TPosition startPos(Ctx.Pos());
  798. if (!selectPos) {
  799. selectPos = startPos;
  800. }
  801. TSourcePtr source(NamedSingleSource(node.GetRule_named_single_source2(), true));
  802. if (!source) {
  803. return {};
  804. }
  805. if (node.GetBlock3().size()) {
  806. TVector<TSourcePtr> sources(1, source);
  807. for (auto& s: node.GetBlock3()) {
  808. sources.push_back(NamedSingleSource(s.GetRule_named_single_source2(), true));
  809. if (!sources.back()) {
  810. return nullptr;
  811. }
  812. }
  813. auto pos = source->GetPos();
  814. source = BuildMuxSource(pos, std::move(sources));
  815. }
  816. TVector<TSortSpecificationPtr> orderBy;
  817. if (node.HasBlock4()) {
  818. if (!SortSpecificationList(node.GetBlock4().GetRule_sort_specification_list2(), orderBy)) {
  819. return {};
  820. }
  821. }
  822. TVector<TNodePtr> keys;
  823. if (!ColumnList(keys, node.GetRule_column_list6())) {
  824. return nullptr;
  825. }
  826. if (node.HasBlock11()) {
  827. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  828. TSqlExpression expr(Ctx, Mode);
  829. TNodePtr where = expr.Build(node.GetBlock11().GetRule_expr2());
  830. if (!where || !source->AddFilter(Ctx, where)) {
  831. return nullptr;
  832. }
  833. Ctx.IncrementMonCounter("sql_features", "ReduceWhere");
  834. } else {
  835. Ctx.IncrementMonCounter("sql_features", "Reduce");
  836. }
  837. TNodePtr having;
  838. if (node.HasBlock12()) {
  839. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  840. TSqlExpression expr(Ctx, Mode);
  841. having = expr.Build(node.GetBlock12().GetRule_expr2());
  842. if (!having) {
  843. return nullptr;
  844. }
  845. }
  846. bool listCall = false;
  847. TSqlCallExpr call(Ctx, Mode);
  848. bool initRet = call.Init(node.GetRule_using_call_expr9());
  849. if (initRet) {
  850. call.IncCounters();
  851. }
  852. if (!initRet) {
  853. return nullptr;
  854. }
  855. auto args = call.GetArgs();
  856. for (auto& arg: args) {
  857. if (auto placeholder = dynamic_cast<TTableRows*>(arg.Get())) {
  858. if (listCall) {
  859. Ctx.Error() << "Only one TableRows() argument is allowed.";
  860. return nullptr;
  861. }
  862. listCall = true;
  863. }
  864. }
  865. TSqlCallExpr finalCall(call, args);
  866. TNodePtr udf(finalCall.BuildUdf(/* forReduce = */ true));
  867. if (!udf) {
  868. return {};
  869. }
  870. if (node.HasBlock10()) {
  871. udf->SetLabel(Id(node.GetBlock10().GetRule_an_id2(), *this));
  872. }
  873. const auto reduceMode = node.HasBlock8() ? ReduceMode::ByAll : ReduceMode::ByPartition;
  874. TVector<TSortSpecificationPtr> assumeOrderBy;
  875. if (node.HasBlock13()) {
  876. if (!OrderByClause(node.GetBlock13().GetRule_order_by_clause2(), assumeOrderBy)) {
  877. return nullptr;
  878. }
  879. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(assumeOrderBy) ? "AssumeOrderBy" : "AssumeOrderByExpr");
  880. }
  881. return BuildReduce(startPos, reduceMode, std::move(source), std::move(orderBy), std::move(keys), std::move(args), udf, having,
  882. settings, assumeOrderBy, listCall);
  883. }
  884. TSourcePtr TSqlSelect::SelectCore(const TRule_select_core& node, const TWriteSettings& settings, TPosition& selectPos,
  885. TMaybe<TSelectKindPlacement> placement, TVector<TSortSpecificationPtr>& selectOpOrderBy, bool& selectOpAssumeOrderBy)
  886. {
  887. // (FROM join_source)? SELECT STREAM? opt_set_quantifier result_column (COMMA result_column)* COMMA? (WITHOUT column_list)? (FROM join_source)? (WHERE expr)?
  888. // group_by_clause? (HAVING expr)? window_clause? ext_order_by_clause?
  889. selectOpOrderBy = {};
  890. selectOpAssumeOrderBy = false;
  891. if (node.HasBlock1()) {
  892. Token(node.GetBlock1().GetToken1());
  893. } else {
  894. Token(node.GetToken2());
  895. }
  896. TPosition startPos(Ctx.Pos());
  897. if (!selectPos) {
  898. selectPos = Ctx.Pos();
  899. }
  900. const auto hints = Ctx.PullHintForToken(selectPos);
  901. TColumnsSets uniqueSets, distinctSets;
  902. for (const auto& hint : hints) {
  903. if (const auto& name = to_lower(hint.Name); name == "unique")
  904. uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  905. else if (name == "distinct") {
  906. uniqueSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  907. distinctSets.insert_unique(NSorted::TSimpleSet<TString>(hint.Values.cbegin(), hint.Values.cend()));
  908. } else {
  909. Ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Hint " << hint.Name << " will not be used";
  910. }
  911. }
  912. const bool distinct = IsDistinctOptSet(node.GetRule_opt_set_quantifier4());
  913. if (distinct) {
  914. Ctx.IncrementMonCounter("sql_features", "DistinctInSelect");
  915. }
  916. TSourcePtr source(BuildFakeSource(selectPos, /* missingFrom = */ true, Mode == NSQLTranslation::ESqlMode::SUBQUERY));
  917. if (node.HasBlock1() && node.HasBlock9()) {
  918. Token(node.GetBlock9().GetToken1());
  919. Ctx.IncrementMonCounter("sql_errors", "DoubleFrom");
  920. Ctx.Error() << "Only one FROM clause is allowed";
  921. return nullptr;
  922. }
  923. if (node.HasBlock1()) {
  924. source = JoinSource(node.GetBlock1().GetRule_join_source2());
  925. Ctx.IncrementMonCounter("sql_features", "FromInFront");
  926. } else if (node.HasBlock9()) {
  927. source = JoinSource(node.GetBlock9().GetRule_join_source2());
  928. }
  929. if (!source) {
  930. return nullptr;
  931. }
  932. const bool selectStream = node.HasBlock3();
  933. TVector<TNodePtr> without;
  934. if (node.HasBlock8()) {
  935. if (!ColumnList(without, node.GetBlock8().GetRule_without_column_list2())) {
  936. return nullptr;
  937. }
  938. }
  939. if (node.HasBlock10()) {
  940. auto block = node.GetBlock10();
  941. Token(block.GetToken1());
  942. TPosition pos(Ctx.Pos());
  943. TNodePtr where;
  944. {
  945. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  946. TSqlExpression expr(Ctx, Mode);
  947. where = expr.Build(block.GetRule_expr2());
  948. }
  949. if (!where) {
  950. Ctx.IncrementMonCounter("sql_errors", "WhereInvalid");
  951. return nullptr;
  952. }
  953. if (!source->AddFilter(Ctx, where)) {
  954. Ctx.IncrementMonCounter("sql_errors", "WhereNotSupportedBySource");
  955. return nullptr;
  956. }
  957. Ctx.IncrementMonCounter("sql_features", "Where");
  958. }
  959. /// \todo merge gtoupByExpr and groupBy in one
  960. TVector<TNodePtr> groupByExpr, groupBy;
  961. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
  962. bool compactGroupBy = false;
  963. TString groupBySuffix;
  964. if (node.HasBlock11()) {
  965. TGroupByClause clause(Ctx, Mode);
  966. if (!clause.Build(node.GetBlock11().GetRule_group_by_clause1())) {
  967. return nullptr;
  968. }
  969. bool hasHopping = (bool)clause.GetLegacyHoppingWindow();
  970. for (const auto& exprAlias: clause.Aliases()) {
  971. YQL_ENSURE(exprAlias.first == exprAlias.second->GetLabel());
  972. groupByExpr.emplace_back(exprAlias.second);
  973. hasHopping |= (bool)dynamic_cast<THoppingWindow*>(exprAlias.second.Get());
  974. }
  975. groupBy = std::move(clause.Content());
  976. clause.SetFeatures("sql_features");
  977. legacyHoppingWindowSpec = clause.GetLegacyHoppingWindow();
  978. compactGroupBy = clause.IsCompactGroupBy();
  979. groupBySuffix = clause.GetSuffix();
  980. if (source->IsStream() && !hasHopping) {
  981. Ctx.Error() << "Streaming group by query must have a hopping window specification.";
  982. return nullptr;
  983. }
  984. }
  985. TNodePtr having;
  986. if (node.HasBlock12()) {
  987. TSqlExpression expr(Ctx, Mode);
  988. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  989. having = expr.Build(node.GetBlock12().GetRule_expr2());
  990. if (!having) {
  991. return nullptr;
  992. }
  993. Ctx.IncrementMonCounter("sql_features", "Having");
  994. }
  995. TWinSpecs windowSpec;
  996. if (node.HasBlock13()) {
  997. if (source->IsStream()) {
  998. Ctx.Error() << "WINDOW is not allowed in streaming queries";
  999. return nullptr;
  1000. }
  1001. if (!WindowClause(node.GetBlock13().GetRule_window_clause1(), windowSpec)) {
  1002. return nullptr;
  1003. }
  1004. Ctx.IncrementMonCounter("sql_features", "WindowClause");
  1005. }
  1006. bool assumeSorted = false;
  1007. TVector<TSortSpecificationPtr> orderBy;
  1008. if (node.HasBlock14()) {
  1009. auto& orderBlock = node.GetBlock14().GetRule_ext_order_by_clause1();
  1010. assumeSorted = orderBlock.HasBlock1();
  1011. Token(orderBlock.GetRule_order_by_clause2().GetToken1());
  1012. if (source->IsStream()) {
  1013. Ctx.Error() << "ORDER BY is not allowed in streaming queries";
  1014. return nullptr;
  1015. }
  1016. if (!ValidateLimitOrderByWithSelectOp(placement, "ORDER BY")) {
  1017. return nullptr;
  1018. }
  1019. if (!OrderByClause(orderBlock.GetRule_order_by_clause2(), orderBy)) {
  1020. return nullptr;
  1021. }
  1022. Ctx.IncrementMonCounter("sql_features", IsColumnsOnly(orderBy)
  1023. ? (assumeSorted ? "AssumeOrderBy" : "OrderBy")
  1024. : (assumeSorted ? "AssumeOrderByExpr" : "OrderByExpr")
  1025. );
  1026. if (!NeedPassLimitOrderByToUnderlyingSelect(placement)) {
  1027. selectOpOrderBy.swap(orderBy);
  1028. std::swap(selectOpAssumeOrderBy, assumeSorted);
  1029. }
  1030. }
  1031. TVector<TNodePtr> terms;
  1032. {
  1033. class TScopedWinSpecs {
  1034. public:
  1035. TScopedWinSpecs(TContext& ctx, TWinSpecs& specs)
  1036. : Ctx(ctx)
  1037. {
  1038. Ctx.WinSpecsScopes.push_back(std::ref(specs));
  1039. }
  1040. ~TScopedWinSpecs() {
  1041. Ctx.WinSpecsScopes.pop_back();
  1042. }
  1043. private:
  1044. TContext& Ctx;
  1045. };
  1046. TScopedWinSpecs scoped(Ctx, windowSpec);
  1047. if (!SelectTerm(terms, node.GetRule_result_column5())) {
  1048. return nullptr;
  1049. }
  1050. for (auto block: node.GetBlock6()) {
  1051. if (!SelectTerm(terms, block.GetRule_result_column2())) {
  1052. return nullptr;
  1053. }
  1054. }
  1055. }
  1056. if (!ValidateSelectColumns(terms)) {
  1057. return nullptr;
  1058. }
  1059. return BuildSelectCore(Ctx, startPos, std::move(source), groupByExpr, groupBy, compactGroupBy, groupBySuffix, assumeSorted, orderBy, having,
  1060. std::move(windowSpec), legacyHoppingWindowSpec, std::move(terms), distinct, std::move(without), selectStream, settings, std::move(uniqueSets), std::move(distinctSets));
  1061. }
  1062. bool TSqlSelect::WindowDefinition(const TRule_window_definition& rule, TWinSpecs& winSpecs) {
  1063. const TString windowName = Id(rule.GetRule_new_window_name1().GetRule_window_name1().GetRule_an_id_window1(), *this);
  1064. if (winSpecs.contains(windowName)) {
  1065. Ctx.Error() << "Unable to declare window with same name: " << windowName;
  1066. return false;
  1067. }
  1068. auto windowSpec = WindowSpecification(rule.GetRule_window_specification3().GetRule_window_specification_details2());
  1069. if (!windowSpec) {
  1070. return false;
  1071. }
  1072. winSpecs.emplace(windowName, std::move(windowSpec));
  1073. return true;
  1074. }
  1075. bool TSqlSelect::WindowClause(const TRule_window_clause& rule, TWinSpecs& winSpecs) {
  1076. auto windowList = rule.GetRule_window_definition_list2();
  1077. if (!WindowDefinition(windowList.GetRule_window_definition1(), winSpecs)) {
  1078. return false;
  1079. }
  1080. for (auto& block: windowList.GetBlock2()) {
  1081. if (!WindowDefinition(block.GetRule_window_definition2(), winSpecs)) {
  1082. return false;
  1083. }
  1084. }
  1085. return true;
  1086. }
  1087. bool TSqlTranslation::OrderByClause(const TRule_order_by_clause& node, TVector<TSortSpecificationPtr>& orderBy) {
  1088. return SortSpecificationList(node.GetRule_sort_specification_list3(), orderBy);
  1089. }
  1090. bool TSqlSelect::ValidateLimitOrderByWithSelectOp(TMaybe<TSelectKindPlacement> placement, TStringBuf what) {
  1091. if (!placement.Defined()) {
  1092. // not in select_op chain
  1093. return true;
  1094. }
  1095. if (!placement->IsLastInSelectOp) {
  1096. Ctx.Error() << what << " within UNION ALL is only allowed after last subquery";
  1097. return false;
  1098. }
  1099. return true;
  1100. }
  1101. bool TSqlSelect::NeedPassLimitOrderByToUnderlyingSelect(TMaybe<TSelectKindPlacement> placement) {
  1102. return !placement.Defined() || !placement->IsLastInSelectOp;
  1103. }
  1104. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_partial& node, TPosition& selectPos,
  1105. TMaybe<TSelectKindPlacement> placement)
  1106. {
  1107. auto res = SelectKind(node.GetRule_select_kind1(), selectPos, placement);
  1108. if (!res) {
  1109. return {};
  1110. }
  1111. TPosition startPos(Ctx.Pos());
  1112. /// LIMIT INTEGER block
  1113. TNodePtr skipTake;
  1114. if (node.HasBlock2()) {
  1115. auto block = node.GetBlock2();
  1116. Token(block.GetToken1());
  1117. TPosition pos(Ctx.Pos());
  1118. if (!ValidateLimitOrderByWithSelectOp(placement, "LIMIT")) {
  1119. return {};
  1120. }
  1121. TSqlExpression takeExpr(Ctx, Mode);
  1122. auto take = takeExpr.Build(block.GetRule_expr2());
  1123. if (!take) {
  1124. return{};
  1125. }
  1126. TNodePtr skip;
  1127. if (block.HasBlock3()) {
  1128. TSqlExpression skipExpr(Ctx, Mode);
  1129. skip = skipExpr.Build(block.GetBlock3().GetRule_expr2());
  1130. if (!skip) {
  1131. return {};
  1132. }
  1133. if (Token(block.GetBlock3().GetToken1()) == ",") {
  1134. // LIMIT skip, take
  1135. skip.Swap(take);
  1136. Ctx.IncrementMonCounter("sql_features", "LimitSkipTake");
  1137. } else {
  1138. Ctx.IncrementMonCounter("sql_features", "LimitOffset");
  1139. }
  1140. }
  1141. auto st = BuildSkipTake(pos, skip, take);
  1142. if (NeedPassLimitOrderByToUnderlyingSelect(placement)) {
  1143. skipTake = st;
  1144. } else {
  1145. res.SelectOpSkipTake = st;
  1146. }
  1147. Ctx.IncrementMonCounter("sql_features", "Limit");
  1148. }
  1149. res.Source = BuildSelect(startPos, std::move(res.Source), skipTake);
  1150. return res;
  1151. }
  1152. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind& node, TPosition& selectPos,
  1153. TMaybe<TSelectKindPlacement> placement)
  1154. {
  1155. const bool discard = node.HasBlock1();
  1156. const bool hasLabel = node.HasBlock3();
  1157. if (hasLabel && (Mode == NSQLTranslation::ESqlMode::LIMITED_VIEW || Mode == NSQLTranslation::ESqlMode::SUBQUERY)) {
  1158. Ctx.Error() << "INTO RESULT is not allowed in current mode";
  1159. return {};
  1160. }
  1161. if (discard && hasLabel) {
  1162. Ctx.Error() << "DISCARD and INTO RESULT cannot be used at the same time";
  1163. return {};
  1164. }
  1165. if (discard && !selectPos) {
  1166. selectPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
  1167. }
  1168. TWriteSettings settings;
  1169. settings.Discard = discard;
  1170. if (hasLabel) {
  1171. settings.Label = PureColumnOrNamed(node.GetBlock3().GetRule_pure_column_or_named3(), *this);
  1172. }
  1173. TSelectKindResult res;
  1174. if (placement.Defined()) {
  1175. if (placement->IsFirstInSelectOp) {
  1176. res.Settings.Discard = settings.Discard;
  1177. } else if (settings.Discard) {
  1178. auto discardPos = Ctx.TokenPosition(node.GetBlock1().GetToken1());
  1179. Ctx.Error(discardPos) << "DISCARD within UNION ALL is only allowed before first subquery";
  1180. return {};
  1181. }
  1182. if (placement->IsLastInSelectOp) {
  1183. res.Settings.Label = settings.Label;
  1184. } else if (!settings.Label.Empty()) {
  1185. auto labelPos = Ctx.TokenPosition(node.GetBlock3().GetToken1());
  1186. Ctx.Error(labelPos) << "INTO RESULT within UNION ALL is only allowed after last subquery";
  1187. return {};
  1188. }
  1189. settings = {};
  1190. }
  1191. switch (node.GetBlock2().Alt_case()) {
  1192. case TRule_select_kind_TBlock2::kAlt1:
  1193. res.Source = ProcessCore(node.GetBlock2().GetAlt1().GetRule_process_core1(), settings, selectPos);
  1194. break;
  1195. case TRule_select_kind_TBlock2::kAlt2:
  1196. res.Source = ReduceCore(node.GetBlock2().GetAlt2().GetRule_reduce_core1(), settings, selectPos);
  1197. break;
  1198. case TRule_select_kind_TBlock2::kAlt3: {
  1199. res.Source = SelectCore(node.GetBlock2().GetAlt3().GetRule_select_core1(), settings, selectPos,
  1200. placement, res.SelectOpOrderBy, res.SelectOpAssumeOrderBy);
  1201. break;
  1202. }
  1203. case TRule_select_kind_TBlock2::ALT_NOT_SET:
  1204. Y_ABORT("You should change implementation according to grammar changes");
  1205. }
  1206. return res;
  1207. }
  1208. TSqlSelect::TSelectKindResult TSqlSelect::SelectKind(const TRule_select_kind_parenthesis& node, TPosition& selectPos,
  1209. TMaybe<TSelectKindPlacement> placement)
  1210. {
  1211. if (node.Alt_case() == TRule_select_kind_parenthesis::kAltSelectKindParenthesis1) {
  1212. return SelectKind(node.GetAlt_select_kind_parenthesis1().GetRule_select_kind_partial1(), selectPos, placement);
  1213. } else {
  1214. return SelectKind(node.GetAlt_select_kind_parenthesis2().GetRule_select_kind_partial2(), selectPos, {});
  1215. }
  1216. }
  1217. template<typename TRule>
  1218. TSourcePtr TSqlSelect::Build(const TRule& node, TPosition pos, TSelectKindResult&& first) {
  1219. if (node.GetBlock2().empty()) {
  1220. return std::move(first.Source);
  1221. }
  1222. auto blocks = node.GetBlock2();
  1223. TPosition unionPos = pos; // Position of first select
  1224. TVector<TSortSpecificationPtr> orderBy;
  1225. bool assumeOrderBy = false;
  1226. TNodePtr skipTake;
  1227. TWriteSettings outermostSettings;
  1228. outermostSettings.Discard = first.Settings.Discard;
  1229. TVector<TSourcePtr> sources{ std::move(first.Source)};
  1230. bool currentQuantifier = false;
  1231. for (int i = 0; i < blocks.size(); ++i) {
  1232. auto& b = blocks[i];
  1233. const bool second = (i == 0);
  1234. const bool last = (i + 1 == blocks.size());
  1235. TSelectKindPlacement placement;
  1236. placement.IsLastInSelectOp = last;
  1237. TSelectKindResult next = SelectKind(b.GetRule_select_kind_parenthesis2(), pos, placement);
  1238. if (!next) {
  1239. return nullptr;
  1240. }
  1241. if (last) {
  1242. orderBy = next.SelectOpOrderBy;
  1243. assumeOrderBy = next.SelectOpAssumeOrderBy;
  1244. skipTake = next.SelectOpSkipTake;
  1245. outermostSettings.Label = next.Settings.Label;
  1246. }
  1247. switch (b.GetRule_select_op1().Alt_case()) {
  1248. case TRule_select_op::kAltSelectOp1:
  1249. break;
  1250. case TRule_select_op::kAltSelectOp2:
  1251. case TRule_select_op::kAltSelectOp3:
  1252. Ctx.Error() << "INTERSECT and EXCEPT are not implemented yet";
  1253. return nullptr;
  1254. case TRule_select_op::ALT_NOT_SET:
  1255. Y_ABORT("You should change implementation according to grammar changes");
  1256. }
  1257. const bool quantifier = b.GetRule_select_op1().GetAlt_select_op1().HasBlock2();
  1258. if (!second && quantifier != currentQuantifier) {
  1259. auto source = BuildUnion(pos, std::move(sources), currentQuantifier, {});
  1260. sources.clear();
  1261. sources.emplace_back(std::move(source));
  1262. }
  1263. sources.emplace_back(std::move(next.Source));
  1264. currentQuantifier = quantifier;
  1265. }
  1266. auto result = BuildUnion(pos, std::move(sources), currentQuantifier, outermostSettings);
  1267. if (orderBy) {
  1268. TVector<TNodePtr> groupByExpr;
  1269. TVector<TNodePtr> groupBy;
  1270. bool compactGroupBy = false;
  1271. TString groupBySuffix = "";
  1272. TNodePtr having;
  1273. TWinSpecs winSpecs;
  1274. TLegacyHoppingWindowSpecPtr legacyHoppingWindowSpec;
  1275. bool distinct = false;
  1276. TVector<TNodePtr> without;
  1277. bool stream = false;
  1278. TVector<TNodePtr> terms;
  1279. terms.push_back(BuildColumn(unionPos, "*", ""));
  1280. result = BuildSelectCore(Ctx, unionPos, std::move(result), groupByExpr, groupBy, compactGroupBy, groupBySuffix,
  1281. assumeOrderBy, orderBy, having, std::move(winSpecs), legacyHoppingWindowSpec, std::move(terms),
  1282. distinct, std::move(without), stream, outermostSettings, {}, {});
  1283. result = BuildSelect(unionPos, std::move(result), skipTake);
  1284. } else if (skipTake) {
  1285. result = BuildSelect(unionPos, std::move(result), skipTake);
  1286. }
  1287. return result;
  1288. }
  1289. TSourcePtr TSqlSelect::Build(const TRule_select_stmt& node, TPosition& selectPos) {
  1290. TMaybe<TSelectKindPlacement> placement;
  1291. if (!node.GetBlock2().empty()) {
  1292. placement.ConstructInPlace();
  1293. placement->IsFirstInSelectOp = true;
  1294. }
  1295. auto res = SelectKind(node.GetRule_select_kind_parenthesis1(), selectPos, placement);
  1296. if (!res) {
  1297. return nullptr;
  1298. }
  1299. return Build(node, selectPos, std::move(res));
  1300. }
  1301. TSourcePtr TSqlSelect::Build(const TRule_select_unparenthesized_stmt& node, TPosition& selectPos) {
  1302. TMaybe<TSelectKindPlacement> placement;
  1303. if (!node.GetBlock2().empty()) {
  1304. placement.ConstructInPlace();
  1305. placement->IsFirstInSelectOp = true;
  1306. }
  1307. auto res = SelectKind(node.GetRule_select_kind_partial1(), selectPos, placement);
  1308. if (!res) {
  1309. return nullptr;
  1310. }
  1311. return Build(node, selectPos, std::move(res));
  1312. }
  1313. } // namespace NSQLTranslationV1