123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- #include "sql_group_by.h"
- #include "sql_expression.h"
- #include "source.h"
- #include <yql/essentials/minikql/mkql_type_ops.h>
- namespace NSQLTranslationV1 {
- using namespace NSQLv1Generated;
- const TString TGroupByClause::AutogenerateNamePrefix = "group";
- bool TGroupByClause::Build(const TRule_group_by_clause& node) {
- // group_by_clause: GROUP COMPACT? BY opt_set_quantifier grouping_element_list (WITH an_id)?;
- if (Ctx.CompactGroupBy.Defined()) {
- CompactGroupBy = *Ctx.CompactGroupBy;
- } else {
- CompactGroupBy = node.HasBlock2();
- if (!CompactGroupBy) {
- auto hints = Ctx.PullHintForToken(Ctx.TokenPosition(node.GetToken1()));
- CompactGroupBy = AnyOf(hints, [](const NSQLTranslation::TSQLHint& hint) { return to_lower(hint.Name) == "compact"; });
- }
- }
- TPosition distinctPos;
- if (IsDistinctOptSet(node.GetRule_opt_set_quantifier4(), distinctPos)) {
- Ctx.Error(distinctPos) << "DISTINCT is not supported in GROUP BY clause yet!";
- Ctx.IncrementMonCounter("sql_errors", "DistinctInGroupByNotSupported");
- return false;
- }
- if (!ParseList(node.GetRule_grouping_element_list5(), EGroupByFeatures::Ordinary)) {
- return false;
- }
- if (node.HasBlock6()) {
- TString mode = Id(node.GetBlock6().GetRule_an_id2(), *this);
- TMaybe<TIssue> normalizeError = NormalizeName(Ctx.Pos(), mode);
- if (!normalizeError.Empty()) {
- Error() << normalizeError->GetMessage();
- Ctx.IncrementMonCounter("sql_errors", "NormalizeGroupByModeError");
- return false;
- }
- if (mode == "combine") {
- Suffix = "Combine";
- } else if (mode == "combinestate") {
- Suffix = "CombineState";
- } else if (mode == "mergestate") {
- Suffix = "MergeState";
- } else if (mode == "finalize") {
- Suffix = "Finalize";
- } else if (mode == "mergefinalize") {
- Suffix = "MergeFinalize";
- } else if (mode == "mergemanyfinalize") {
- Suffix = "MergeManyFinalize";
- } else {
- Ctx.Error() << "Unsupported group by mode: " << mode;
- Ctx.IncrementMonCounter("sql_errors", "GroupByModeUnknown");
- return false;
- }
- }
- if (!ResolveGroupByAndGrouping()) {
- return false;
- }
- return true;
- }
- bool TGroupByClause::ParseList(const TRule_grouping_element_list& groupingListNode, EGroupByFeatures featureContext) {
- if (!GroupingElement(groupingListNode.GetRule_grouping_element1(), featureContext)) {
- return false;
- }
- for (auto b: groupingListNode.GetBlock2()) {
- if (!GroupingElement(b.GetRule_grouping_element2(), featureContext)) {
- return false;
- }
- }
- return true;
- }
- void TGroupByClause::SetFeatures(const TString& field) const {
- Ctx.IncrementMonCounter(field, "GroupBy");
- const auto& features = Features();
- if (features.Test(EGroupByFeatures::Ordinary)) {
- Ctx.IncrementMonCounter(field, "GroupByOrdinary");
- }
- if (features.Test(EGroupByFeatures::Expression)) {
- Ctx.IncrementMonCounter(field, "GroupByExpression");
- }
- if (features.Test(EGroupByFeatures::Rollup)) {
- Ctx.IncrementMonCounter(field, "GroupByRollup");
- }
- if (features.Test(EGroupByFeatures::Cube)) {
- Ctx.IncrementMonCounter(field, "GroupByCube");
- }
- if (features.Test(EGroupByFeatures::GroupingSet)) {
- Ctx.IncrementMonCounter(field, "GroupByGroupingSet");
- }
- if (features.Test(EGroupByFeatures::Empty)) {
- Ctx.IncrementMonCounter(field, "GroupByEmpty");
- }
- }
- TVector<TNodePtr>& TGroupByClause::Content() {
- return GroupBySet;
- }
- TMap<TString, TNodePtr>& TGroupByClause::Aliases() {
- return GroupSetContext->NodeAliases;
- }
- TLegacyHoppingWindowSpecPtr TGroupByClause::GetLegacyHoppingWindow() const {
- return LegacyHoppingWindowSpec;
- }
- bool TGroupByClause::IsCompactGroupBy() const {
- return CompactGroupBy;
- }
- TString TGroupByClause::GetSuffix() const {
- return Suffix;
- }
- TMaybe<TVector<TNodePtr>> TGroupByClause::MultiplyGroupingSets(const TVector<TNodePtr>& lhs, const TVector<TNodePtr>& rhs) const {
- TVector<TNodePtr> content;
- for (const auto& leftNode: lhs) {
- auto leftPtr = leftNode->ContentListPtr();
- if (!leftPtr) {
- // TODO: shouldn't happen
- Ctx.Error() << "Unable to multiply grouping sets";
- return {};
- }
- for (const auto& rightNode: rhs) {
- TVector<TNodePtr> mulItem(leftPtr->begin(), leftPtr->end());
- auto rightPtr = rightNode->ContentListPtr();
- if (!rightPtr) {
- // TODO: shouldn't happen
- Ctx.Error() << "Unable to multiply grouping sets";
- return {};
- }
- mulItem.insert(mulItem.end(), rightPtr->begin(), rightPtr->end());
- content.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(mulItem)));
- }
- }
- return content;
- }
- bool TGroupByClause::ResolveGroupByAndGrouping() {
- auto listPos = std::find_if(GroupBySet.begin(), GroupBySet.end(), [](const TNodePtr& node) {
- return node->ContentListPtr();
- });
- if (listPos == GroupBySet.end()) {
- return true;
- }
- auto curContent = *(*listPos)->ContentListPtr();
- if (listPos != GroupBySet.begin()) {
- TVector<TNodePtr> emulate(GroupBySet.begin(), listPos);
- TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
- auto mult = MultiplyGroupingSets(emulateContent, curContent);
- if (!mult) {
- return false;
- }
- curContent = *mult;
- }
- for (++listPos; listPos != GroupBySet.end(); ++listPos) {
- auto newElem = (*listPos)->ContentListPtr();
- if (newElem) {
- auto mult = MultiplyGroupingSets(curContent, *newElem);
- if (!mult) {
- return false;
- }
- curContent = *mult;
- } else {
- TVector<TNodePtr> emulate(1, *listPos);
- TVector<TNodePtr> emulateContent(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(emulate)));
- auto mult = MultiplyGroupingSets(curContent, emulateContent);
- if (!mult) {
- return false;
- }
- curContent = *mult;
- }
- }
- TVector<TNodePtr> result(1, BuildListOfNamedNodes(Ctx.Pos(), std::move(curContent)));
- std::swap(result, GroupBySet);
- return true;
- }
- bool TGroupByClause::GroupingElement(const TRule_grouping_element& node, EGroupByFeatures featureContext) {
- TSourcePtr res;
- TVector<TNodePtr> emptyContent;
- switch (node.Alt_case()) {
- case TRule_grouping_element::kAltGroupingElement1:
- if (!OrdinaryGroupingSet(node.GetAlt_grouping_element1().GetRule_ordinary_grouping_set1(), featureContext)) {
- return false;
- }
- Features().Set(EGroupByFeatures::Ordinary);
- break;
- case TRule_grouping_element::kAltGroupingElement2: {
- TGroupByClause subClause(Ctx, Mode, GroupSetContext);
- if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element2().GetRule_rollup_list1().GetRule_ordinary_grouping_set_list3(),
- EGroupByFeatures::Rollup))
- {
- return false;
- }
- auto& content = subClause.Content();
- TVector<TNodePtr> collection;
- for (auto limit = content.end(), begin = content.begin(); limit != begin; --limit) {
- TVector<TNodePtr> grouping(begin, limit);
- collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
- }
- collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
- GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
- Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByRollup" << content.size());
- Features().Set(EGroupByFeatures::Rollup);
- break;
- }
- case TRule_grouping_element::kAltGroupingElement3: {
- TGroupByClause subClause(Ctx, Mode, GroupSetContext);
- if (!subClause.OrdinaryGroupingSetList(node.GetAlt_grouping_element3().GetRule_cube_list1().GetRule_ordinary_grouping_set_list3(),
- EGroupByFeatures::Cube))
- {
- return false;
- }
- auto& content = subClause.Content();
- if (content.size() > Ctx.PragmaGroupByCubeLimit) {
- Ctx.Error() << "GROUP BY CUBE is allowed only for " << Ctx.PragmaGroupByCubeLimit << " columns, but you use " << content.size();
- return false;
- }
- TVector<TNodePtr> collection;
- for (unsigned mask = (1 << content.size()) - 1; mask > 0; --mask) {
- TVector<TNodePtr> grouping;
- for (unsigned index = 0; index < content.size(); ++index) {
- if (mask & (1 << index)) {
- grouping.push_back(content[content.size() - index - 1]);
- }
- }
- collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(grouping)));
- }
- collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(emptyContent)));
- GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
- Ctx.IncrementMonCounter("sql_features", TStringBuilder() << "GroupByCube" << content.size());
- Features().Set(EGroupByFeatures::Cube);
- break;
- }
- case TRule_grouping_element::kAltGroupingElement4: {
- auto listNode = node.GetAlt_grouping_element4().GetRule_grouping_sets_specification1().GetRule_grouping_element_list4();
- TGroupByClause subClause(Ctx, Mode, GroupSetContext);
- if (!subClause.ParseList(listNode, EGroupByFeatures::GroupingSet)) {
- return false;
- }
- auto& content = subClause.Content();
- TVector<TNodePtr> collection;
- bool hasEmpty = false;
- for (auto& elem: content) {
- auto elemContent = elem->ContentListPtr();
- if (elemContent) {
- if (!elemContent->empty() && elemContent->front()->ContentListPtr()) {
- for (auto& sub: *elemContent) {
- FeedCollection(sub, collection, hasEmpty);
- }
- } else {
- FeedCollection(elem, collection, hasEmpty);
- }
- } else {
- TVector<TNodePtr> elemList(1, std::move(elem));
- collection.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(elemList)));
- }
- }
- GroupBySet.push_back(BuildListOfNamedNodes(Ctx.Pos(), std::move(collection)));
- Features().Set(EGroupByFeatures::GroupingSet);
- break;
- }
- case TRule_grouping_element::kAltGroupingElement5: {
- if (!HoppingWindow(node.GetAlt_grouping_element5().GetRule_hopping_window_specification1())) {
- return false;
- }
- break;
- }
- case TRule_grouping_element::ALT_NOT_SET:
- Y_ABORT("You should change implementation according to grammar changes");
- }
- return true;
- }
- void TGroupByClause::FeedCollection(const TNodePtr& elem, TVector<TNodePtr>& collection, bool& hasEmpty) const {
- auto elemContentPtr = elem->ContentListPtr();
- if (elemContentPtr && elemContentPtr->empty()) {
- if (hasEmpty) {
- return;
- }
- hasEmpty = true;
- }
- collection.push_back(elem);
- }
- bool TGroupByClause::OrdinaryGroupingSet(const TRule_ordinary_grouping_set& node, EGroupByFeatures featureContext) {
- TNodePtr namedExprNode;
- {
- TColumnRefScope scope(Ctx, EColumnRefState::Allow);
- namedExprNode = NamedExpr(node.GetRule_named_expr1(), EExpr::GroupBy);
- }
- if (!namedExprNode) {
- return false;
- }
- auto nodeLabel = namedExprNode->GetLabel();
- auto contentPtr = namedExprNode->ContentListPtr();
- if (contentPtr) {
- if (nodeLabel && (contentPtr->size() != 1 || contentPtr->front()->GetLabel())) {
- Ctx.Error() << "Unable to use aliases for list of named expressions";
- Ctx.IncrementMonCounter("sql_errors", "GroupByAliasForListOfExpressions");
- return false;
- }
- for (auto& content: *contentPtr) {
- auto label = content->GetLabel();
- if (!label) {
- if (content->GetColumnName()) {
- namedExprNode->AssumeColumn();
- continue;
- }
- if (!AllowUnnamed(content->GetPos(), featureContext)) {
- return false;
- }
- content->SetLabel(label = GenerateGroupByExprName());
- }
- if (!AddAlias(label, content)) {
- return false;
- }
- content = BuildColumn(content->GetPos(), label);
- }
- } else {
- if (!nodeLabel && namedExprNode->GetColumnName()) {
- namedExprNode->AssumeColumn();
- }
- if (!nodeLabel && !namedExprNode->GetColumnName()) {
- if (!AllowUnnamed(namedExprNode->GetPos(), featureContext)) {
- return false;
- }
- namedExprNode->SetLabel(nodeLabel = GenerateGroupByExprName());
- }
- if (nodeLabel) {
- if (!AddAlias(nodeLabel, namedExprNode)) {
- return false;
- }
- namedExprNode = BuildColumn(namedExprNode->GetPos(), nodeLabel);
- }
- }
- GroupBySet.emplace_back(std::move(namedExprNode));
- return true;
- }
- bool TGroupByClause::OrdinaryGroupingSetList(const TRule_ordinary_grouping_set_list& node, EGroupByFeatures featureContext) {
- if (!OrdinaryGroupingSet(node.GetRule_ordinary_grouping_set1(), featureContext)) {
- return false;
- }
- for (auto& block: node.GetBlock2()) {
- if (!OrdinaryGroupingSet(block.GetRule_ordinary_grouping_set2(), featureContext)) {
- return false;
- }
- }
- return true;
- }
- bool TGroupByClause::HoppingWindow(const TRule_hopping_window_specification& node) {
- if (LegacyHoppingWindowSpec) {
- Ctx.Error() << "Duplicate hopping window specification.";
- return false;
- }
- LegacyHoppingWindowSpec = new TLegacyHoppingWindowSpec;
- {
- TColumnRefScope scope(Ctx, EColumnRefState::Allow);
- TSqlExpression expr(Ctx, Mode);
- LegacyHoppingWindowSpec->TimeExtractor = expr.Build(node.GetRule_expr3());
- if (!LegacyHoppingWindowSpec->TimeExtractor) {
- return false;
- }
- }
- auto processIntervalParam = [&] (const TRule_expr& rule) -> TNodePtr {
- TSqlExpression expr(Ctx, Mode);
- auto node = expr.Build(rule);
- if (!node) {
- return nullptr;
- }
- auto literal = node->GetLiteral("String");
- if (!literal) {
- return new TAstListNodeImpl(Ctx.Pos(), {
- new TAstAtomNodeImpl(Ctx.Pos(), "EvaluateExpr", TNodeFlags::Default),
- node
- });
- }
- const auto out = NKikimr::NMiniKQL::ValueFromString(NKikimr::NUdf::EDataSlot::Interval, *literal);
- if (!out) {
- Ctx.Error(node->GetPos()) << "Expected interval in ISO 8601 format";
- return nullptr;
- }
- if ('T' == literal->back()) {
- Ctx.Error(node->GetPos()) << "Time prefix 'T' at end of interval constant. The designator 'T' shall be absent if all of the time components are absent.";
- return nullptr;
- }
- return new TAstListNodeImpl(Ctx.Pos(), {
- new TAstAtomNodeImpl(Ctx.Pos(), "Interval", TNodeFlags::Default),
- new TAstListNodeImpl(Ctx.Pos(), {
- new TAstAtomNodeImpl(Ctx.Pos(), "quote", TNodeFlags::Default),
- new TAstAtomNodeImpl(Ctx.Pos(), ToString(out.Get<i64>()), TNodeFlags::Default)
- })
- });
- };
- LegacyHoppingWindowSpec->Hop = processIntervalParam(node.GetRule_expr5());
- if (!LegacyHoppingWindowSpec->Hop) {
- return false;
- }
- LegacyHoppingWindowSpec->Interval = processIntervalParam(node.GetRule_expr7());
- if (!LegacyHoppingWindowSpec->Interval) {
- return false;
- }
- LegacyHoppingWindowSpec->Delay = processIntervalParam(node.GetRule_expr9());
- if (!LegacyHoppingWindowSpec->Delay) {
- return false;
- }
- LegacyHoppingWindowSpec->DataWatermarks = Ctx.PragmaDataWatermarks;
- return true;
- }
- bool TGroupByClause::AllowUnnamed(TPosition pos, EGroupByFeatures featureContext) {
- TStringBuf feature;
- switch (featureContext) {
- case EGroupByFeatures::Ordinary:
- return true;
- case EGroupByFeatures::Rollup:
- feature = "ROLLUP";
- break;
- case EGroupByFeatures::Cube:
- feature = "CUBE";
- break;
- case EGroupByFeatures::GroupingSet:
- feature = "GROUPING SETS";
- break;
- default:
- YQL_ENSURE(false, "Unknown feature");
- }
- Ctx.Error(pos) << "Unnamed expressions are not supported in " << feature << ". Please use '<expr> AS <name>'.";
- Ctx.IncrementMonCounter("sql_errors", "GroupBySetNoAliasOrColumn");
- return false;
- }
- TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() {
- return GroupSetContext->GroupFeatures;
- }
- const TGroupByClause::TGroupingSetFeatures& TGroupByClause::Features() const {
- return GroupSetContext->GroupFeatures;
- }
- bool TGroupByClause::AddAlias(const TString& label, const TNodePtr& node) {
- if (Aliases().contains(label)) {
- Ctx.Error() << "Duplicated aliases not allowed";
- Ctx.IncrementMonCounter("sql_errors", "GroupByDuplicateAliases");
- return false;
- }
- Aliases().emplace(label, node);
- return true;
- }
- TString TGroupByClause::GenerateGroupByExprName() {
- return TStringBuilder() << AutogenerateNamePrefix << GroupSetContext->UnnamedCount++;
- }
- } // namespace NSQLTranslationV1
|