sql_group_by.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. #include "sql_group_by.h"
  2. #include "sql_expression.h"
  3. #include "source.h"
  4. #include <yql/essentials/minikql/mkql_type_ops.h>
  5. namespace NSQLTranslationV1 {
  6. using namespace NSQLv1Generated;
  7. const TString TGroupByClause::AutogenerateNamePrefix = "group";
  8. bool TGroupByClause::Build(const TRule_group_by_clause& node) {
  9. // group_by_clause: GROUP COMPACT? BY opt_set_quantifier grouping_element_list (WITH an_id)?;
  10. if (Ctx.CompactGroupBy.Defined()) {
  11. CompactGroupBy = *Ctx.CompactGroupBy;
  12. } else {
  13. CompactGroupBy = node.HasBlock2();
  14. if (!CompactGroupBy) {
  15. auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(node.GetToken1()));
  16. CompactGroupBy = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
  17. }
  18. }
  19. TPosition distinctPos;
  20. if (IsDistinctOptSet(node.GetRule_opt_set_quantifier4(), distinctPos)) {
  21. Ctx.Error(distinctPos) << "DISTINCT is not supported in GROUP BY clause yet!";
  22. Ctx.IncrementMonCounter("sql_errors", "DistinctInGroupByNotSupported");
  23. return false;
  24. }
  25. if (!ParseList(node.GetRule_grouping_element_list5(), EGroupByFeatures::Ordinary)) {
  26. return false;
  27. }
  28. if (node.HasBlock6()) {
  29. TString mode = Id(node.GetBlock6().GetRule_an_id2(), *this);
  30. TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), mode);
  31. if (!normalizeError.Empty()) {
  32. Error() << normalizeError->GetMessage();
  33. Ctx.IncrementMonCounter("sql_errors", "NormalizeGroupByModeError");
  34. return false;
  35. }
  36. if (mode == "combine") {
  37. Suffix = "Combine";
  38. } else if (mode == "combinestate") {
  39. Suffix = "CombineState";
  40. } else if (mode == "mergestate") {
  41. Suffix = "MergeState";
  42. } else if (mode == "finalize") {
  43. Suffix = "Finalize";
  44. } else if (mode == "mergefinalize") {
  45. Suffix = "MergeFinalize";
  46. } else if (mode == "mergemanyfinalize") {
  47. Suffix = "MergeManyFinalize";
  48. } else {
  49. Ctx.Error() << "Unsupported group by mode: " << mode;
  50. Ctx.IncrementMonCounter("sql_errors", "GroupByModeUnknown");
  51. return false;
  52. }
  53. }
  54. if (!ResolveGroupByAndGrouping()) {
  55. return false;
  56. }
  57. return true;
  58. }
  59. bool TGroupByClause::ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext) {
  60. if (!GroupingElement(groupingListNode.GetRule_grouping_element1(), featureContext)) {
  61. return false;
  62. }
  63. for (auto b: groupingListNode.GetBlock2()) {
  64. if (!GroupingElement(b.GetRule_grouping_element2(), featureContext)) {
  65. return false;
  66. }
  67. }
  68. return true;
  69. }
  70. void TGroupByClause::SetFeatures(const TString& field) const {
  71. Ctx.IncrementMonCounter(field, "GroupBy");
  72. const auto& features = Features();
  73. if (features.Test(EGroupByFeatures::Ordinary)) {
  74. Ctx.IncrementMonCounter(field, "GroupByOrdinary");
  75. }
  76. if (features.Test(EGroupByFeatures::Expression)) {
  77. Ctx.IncrementMonCounter(field, "GroupByExpression");
  78. }
  79. if (features.Test(EGroupByFeatures::Rollup)) {
  80. Ctx.IncrementMonCounter(field, "GroupByRollup");
  81. }
  82. if (features.Test(EGroupByFeatures::Cube)) {
  83. Ctx.IncrementMonCounter(field, "GroupByCube");
  84. }
  85. if (features.Test(EGroupByFeatures::GroupingSet)) {
  86. Ctx.IncrementMonCounter(field, "GroupByGroupingSet");
  87. }
  88. if (features.Test(EGroupByFeatures::Empty)) {
  89. Ctx.IncrementMonCounter(field, "GroupByEmpty");
  90. }
  91. }
  92. TVector<TNodePtr>& TGroupByClause::Content() {
  93. return GroupBySet;
  94. }
  95. TMap<TString, TNodePtr>& TGroupByClause::Aliases() {
  96. return GroupSetContext->NodeAliases;
  97. }
  98. TLegacyHoppingWindowSpecPtr TGroupByClause::GetLegacyHoppingWindow() const {
  99. return LegacyHoppingWindowSpec;
  100. }
  101. bool TGroupByClause::IsCompactGroupBy() const {
  102. return CompactGroupBy;
  103. }
  104. TString TGroupByClause::GetSuffix() const {
  105. return Suffix;
  106. }
  107. TMaybe<TVector<TNodePtr>> TGroupByClause::MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const {
  108. TVector<TNodePtr> content;
  109. for (const auto& leftNode: lhs) {
  110. auto leftPtr = leftNode->ContentListPtr();
  111. if (!leftPtr) {
  112. // TODO: shouldn't happen
  113. Ctx.Error() << "Unable to multiply grouping sets";
  114. return {};
  115. }
  116. for (const auto& rightNode: rhs) {
  117. TVector<TNodePtr> mulItem(leftPtr->begin(), leftPtr->end());
  118. auto rightPtr = rightNode->ContentListPtr();
  119. if (!rightPtr) {
  120. // TODO: shouldn't happen
  121. Ctx.Error() << "Unable to multiply grouping sets";
  122. return {};
  123. }
  124. mulItem.insert(mulItem.end(), rightPtr->begin(), rightPtr->end());
  125. content.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(mulItem)));
  126. }
  127. }
  128. return content;
  129. }
  130. bool TGroupByClause::ResolveGroupByAndGrouping() {
  131. auto listPos = std::find_if(GroupBySet.begin(), GroupBySet.end(), [](const TNodePtr& node) {
  132. return node->ContentListPtr();
  133. });
  134. if (listPos == GroupBySet.end()) {
  135. return true;
  136. }
  137. auto curContent = *(*listPos)->ContentListPtr();
  138. if (listPos != GroupBySet.begin()) {
  139. TVector<TNodePtr> emulate(GroupBySet.begin(), listPos);
  140. TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
  141. auto mult = MultiplyGroupingSets(emulateContent, curContent);
  142. if (!mult) {
  143. return false;
  144. }
  145. curContent = *mult;
  146. }
  147. for (++listPos; listPos != GroupBySet.end(); ++listPos) {
  148. auto newElem = (*listPos)->ContentListPtr();
  149. if (newElem) {
  150. auto mult = MultiplyGroupingSets(curContent, *newElem);
  151. if (!mult) {
  152. return false;
  153. }
  154. curContent = *mult;
  155. } else {
  156. TVector<TNodePtr> emulate(1, *listPos);
  157. TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
  158. auto mult = MultiplyGroupingSets(curContent, emulateContent);
  159. if (!mult) {
  160. return false;
  161. }
  162. curContent = *mult;
  163. }
  164. }
  165. TVector<TNodePtr> result(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(curContent)));
  166. std::swap(result, GroupBySet);
  167. return true;
  168. }
  169. bool TGroupByClause::GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext) {
  170. TSourcePtr res;
  171. TVector<TNodePtr> emptyContent;
  172. switch (node.Alt_case()) {
  173. case TRule_grouping_element::kAltGroupingElement1:
  174. if (!OrdinaryGroupingSet(node.GetAlt_grouping_element1().GetRule_ordinary_grouping_set1(), featureContext)) {
  175. return false;
  176. }
  177. Features().Set(EGroupByFeatures::Ordinary);
  178. break;
  179. case TRule_grouping_element::kAltGroupingElement2: {
  180. TGroupByClause subClause(Ctx, Mode, GroupSetContext);
  181. if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element2().GetRule_rollup_list1().GetRule_ordinary_grouping_set_list3(),
  182. EGroupByFeatures::Rollup))
  183. {
  184. return false;
  185. }
  186. auto& content = subClause.Content();
  187. TVector<TNodePtr> collection;
  188. for (auto limit = content.end(), begin = content.begin(); limit != begin; --limit) {
  189. TVector<TNodePtr> grouping(begin, limit);
  190. collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
  191. }
  192. collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
  193. GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
  194. Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByRollup" << content.size());
  195. Features().Set(EGroupByFeatures::Rollup);
  196. break;
  197. }
  198. case TRule_grouping_element::kAltGroupingElement3: {
  199. TGroupByClause subClause(Ctx, Mode, GroupSetContext);
  200. if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element3().GetRule_cube_list1().GetRule_ordinary_grouping_set_list3(),
  201. EGroupByFeatures::Cube))
  202. {
  203. return false;
  204. }
  205. auto& content = subClause.Content();
  206. if (content.size() > Ctx.PragmaGroupByCubeLimit) {
  207. Ctx.Error() << "GROUP BY CUBE is allowed only for " << Ctx.PragmaGroupByCubeLimit << " columns, but you use " << content.size();
  208. return false;
  209. }
  210. TVector<TNodePtr> collection;
  211. for (unsigned mask = (1 << content.size()) - 1; mask > 0; --mask) {
  212. TVector<TNodePtr> grouping;
  213. for (unsigned index = 0; index < content.size(); ++index) {
  214. if (mask & (1 << index)) {
  215. grouping.push_back(content[content.size() - index - 1]);
  216. }
  217. }
  218. collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
  219. }
  220. collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
  221. GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
  222. Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByCube" << content.size());
  223. Features().Set(EGroupByFeatures::Cube);
  224. break;
  225. }
  226. case TRule_grouping_element::kAltGroupingElement4: {
  227. auto listNode = node.GetAlt_grouping_element4().GetRule_grouping_sets_specification1().GetRule_grouping_element_list4();
  228. TGroupByClause subClause(Ctx, Mode, GroupSetContext);
  229. if (!subClause.ParseList(listNode, EGroupByFeatures::GroupingSet)) {
  230. return false;
  231. }
  232. auto& content = subClause.Content();
  233. TVector<TNodePtr> collection;
  234. bool hasEmpty = false;
  235. for (auto& elem: content) {
  236. auto elemContent = elem->ContentListPtr();
  237. if (elemContent) {
  238. if (!elemContent->empty() && elemContent->front()->ContentListPtr()) {
  239. for (auto& sub: *elemContent) {
  240. FeedCollection(sub, collection, hasEmpty);
  241. }
  242. } else {
  243. FeedCollection(elem, collection, hasEmpty);
  244. }
  245. } else {
  246. TVector<TNodePtr> elemList(1, std::move(elem));
  247. collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(elemList)));
  248. }
  249. }
  250. GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
  251. Features().Set(EGroupByFeatures::GroupingSet);
  252. break;
  253. }
  254. case TRule_grouping_element::kAltGroupingElement5: {
  255. if (!HoppingWindow(node.GetAlt_grouping_element5().GetRule_hopping_window_specification1())) {
  256. return false;
  257. }
  258. break;
  259. }
  260. case TRule_grouping_element::ALT_NOT_SET:
  261. Y_ABORT("You should change implementation according to grammar changes");
  262. }
  263. return true;
  264. }
  265. void TGroupByClause::FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const {
  266. auto elemContentPtr = elem->ContentListPtr();
  267. if (elemContentPtr && elemContentPtr->empty()) {
  268. if (hasEmpty) {
  269. return;
  270. }
  271. hasEmpty = true;
  272. }
  273. collection.push_back(elem);
  274. }
  275. bool TGroupByClause::OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext) {
  276. TNodePtr namedExprNode;
  277. {
  278. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  279. namedExprNode = NamedExpr(node.GetRule_named_expr1(), EExpr::GroupBy);
  280. }
  281. if (!namedExprNode) {
  282. return false;
  283. }
  284. auto nodeLabel = namedExprNode->GetLabel();
  285. auto contentPtr = namedExprNode->ContentListPtr();
  286. if (contentPtr) {
  287. if (nodeLabel && (contentPtr->size() != 1 || contentPtr->front()->GetLabel())) {
  288. Ctx.Error() << "Unable to use aliases for list of named expressions";
  289. Ctx.IncrementMonCounter("sql_errors", "GroupByAliasForListOfExpressions");
  290. return false;
  291. }
  292. for (auto& content: *contentPtr) {
  293. auto label = content->GetLabel();
  294. if (!label) {
  295. if (content->GetColumnName()) {
  296. namedExprNode->AssumeColumn();
  297. continue;
  298. }
  299. if (!AllowUnnamed(content->GetPos(), featureContext)) {
  300. return false;
  301. }
  302. content->SetLabel(label = GenerateGroupByExprName());
  303. }
  304. if (!AddAlias(label, content)) {
  305. return false;
  306. }
  307. content = BuildColumn(content->GetPos(), label);
  308. }
  309. } else {
  310. if (!nodeLabel && namedExprNode->GetColumnName()) {
  311. namedExprNode->AssumeColumn();
  312. }
  313. if (!nodeLabel && !namedExprNode->GetColumnName()) {
  314. if (!AllowUnnamed(namedExprNode->GetPos(), featureContext)) {
  315. return false;
  316. }
  317. namedExprNode->SetLabel(nodeLabel = GenerateGroupByExprName());
  318. }
  319. if (nodeLabel) {
  320. if (!AddAlias(nodeLabel, namedExprNode)) {
  321. return false;
  322. }
  323. namedExprNode = BuildColumn(namedExprNode->GetPos(), nodeLabel);
  324. }
  325. }
  326. GroupBySet.emplace_back(std::move(namedExprNode));
  327. return true;
  328. }
  329. bool TGroupByClause::OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext) {
  330. if (!OrdinaryGroupingSet(node.GetRule_ordinary_grouping_set1(), featureContext)) {
  331. return false;
  332. }
  333. for (auto& block: node.GetBlock2()) {
  334. if (!OrdinaryGroupingSet(block.GetRule_ordinary_grouping_set2(), featureContext)) {
  335. return false;
  336. }
  337. }
  338. return true;
  339. }
  340. bool TGroupByClause::HoppingWindow(const TRule_hopping_window_specification& node) {
  341. if (LegacyHoppingWindowSpec) {
  342. Ctx.Error() << "Duplicate hopping window specification.";
  343. return false;
  344. }
  345. LegacyHoppingWindowSpec = new TLegacyHoppingWindowSpec;
  346. {
  347. TColumnRefScope scope(Ctx, EColumnRefState::Allow);
  348. TSqlExpression expr(Ctx, Mode);
  349. LegacyHoppingWindowSpec->TimeExtractor = expr.Build(node.GetRule_expr3());
  350. if (!LegacyHoppingWindowSpec->TimeExtractor) {
  351. return false;
  352. }
  353. }
  354. auto processIntervalParam = [&] (const TRule_expr& rule) -> TNodePtr {
  355. TSqlExpression expr(Ctx, Mode);
  356. auto node = expr.Build(rule);
  357. if (!node) {
  358. return nullptr;
  359. }
  360. auto literal = node->GetLiteral("String");
  361. if (!literal) {
  362. return new TAstListNodeImpl(Ctx.Pos(), {
  363. new TAstAtomNodeImpl(Ctx.Pos(), "EvaluateExpr", TNodeFlags::Default),
  364. node
  365. });
  366. }
  367. const auto out = NKikimr::NMiniKQL::ValueFromString(NKikimr::NUdf::EDataSlot::Interval, *literal);
  368. if (!out) {
  369. Ctx.Error(node->GetPos()) << "Expected interval in ISO 8601 format";
  370. return nullptr;
  371. }
  372. if ('T' == literal->back()) {
  373. Ctx.Error(node->GetPos()) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent.";
  374. return nullptr;
  375. }
  376. return new TAstListNodeImpl(Ctx.Pos(), {
  377. new TAstAtomNodeImpl(Ctx.Pos(), "Interval", TNodeFlags::Default),
  378. new TAstListNodeImpl(Ctx.Pos(), {
  379. new TAstAtomNodeImpl(Ctx.Pos(), "quote", TNodeFlags::Default),
  380. new TAstAtomNodeImpl(Ctx.Pos(), ToString(out.Get<i64>()), TNodeFlags::Default)
  381. })
  382. });
  383. };
  384. LegacyHoppingWindowSpec->Hop = processIntervalParam(node.GetRule_expr5());
  385. if (!LegacyHoppingWindowSpec->Hop) {
  386. return false;
  387. }
  388. LegacyHoppingWindowSpec->Interval = processIntervalParam(node.GetRule_expr7());
  389. if (!LegacyHoppingWindowSpec->Interval) {
  390. return false;
  391. }
  392. LegacyHoppingWindowSpec->Delay = processIntervalParam(node.GetRule_expr9());
  393. if (!LegacyHoppingWindowSpec->Delay) {
  394. return false;
  395. }
  396. LegacyHoppingWindowSpec->DataWatermarks = Ctx.PragmaDataWatermarks;
  397. return true;
  398. }
  399. bool TGroupByClause::AllowUnnamed(TPosition pos, EGroupByFeatures featureContext) {
  400. TStringBuf feature;
  401. switch (featureContext) {
  402. case EGroupByFeatures::Ordinary:
  403. return true;
  404. case EGroupByFeatures::Rollup:
  405. feature = "ROLLUP";
  406. break;
  407. case EGroupByFeatures::Cube:
  408. feature = "CUBE";
  409. break;
  410. case EGroupByFeatures::GroupingSet:
  411. feature = "GROUPING SETS";
  412. break;
  413. default:
  414. YQL_ENSURE(false, "Unknown feature");
  415. }
  416. Ctx.Error(pos) << "Unnamed expressions are not supported in " << feature << ". Please use '<expr> AS <name>'.";
  417. Ctx.IncrementMonCounter("sql_errors", "GroupBySetNoAliasOrColumn");
  418. return false;
  419. }
  420. TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() {
  421. return GroupSetContext->GroupFeatures;
  422. }
  423. const TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() const {
  424. return GroupSetContext->GroupFeatures;
  425. }
  426. bool TGroupByClause::AddAlias(const TString& label, const TNodePtr& node) {
  427. if (Aliases().contains(label)) {
  428. Ctx.Error() << "Duplicated aliases not allowed";
  429. Ctx.IncrementMonCounter("sql_errors", "GroupByDuplicateAliases");
  430. return false;
  431. }
  432. Aliases().emplace(label, node);
  433. return true;
  434. }
  435. TString TGroupByClause::GenerateGroupByExprName() {
  436. return TStringBuilder() << AutogenerateNamePrefix << GroupSetContext->UnnamedCount++;
  437. }
  438. } // namespace NSQLTranslationV1