12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984 |
- #include "yql_aggregate_expander.h"
- #include <yql/essentials/core/yql_expr_optimize.h>
- #include <yql/essentials/core/yql_expr_type_annotation.h>
- #include <yql/essentials/core/yql_opt_window.h>
- #include <yql/essentials/core/yql_opt_utils.h>
- #include <yql/essentials/core/yql_type_helpers.h>
- #include <yql/essentials/utils/log/log.h>
- namespace NYql {
- TExprNode::TPtr TAggregateExpander::ExpandAggregate() {
- YQL_CLOG(DEBUG, Core) << "Expand " << Node->Content();
- auto result = ExpandAggregateWithFullOutput();
- if (result) {
- auto outputColumns = GetSetting(*Node->Child(NNodes::TCoAggregate::idx_Settings), "output_columns");
- if (outputColumns) {
- result = Ctx.NewCallable(result->Pos(), "ExtractMembers", { result, outputColumns->ChildPtr(1) });
- }
- }
- return result;
- }
- TExprNode::TPtr TAggregateExpander::ExpandAggregateWithFullOutput()
- {
- Suffix = Node->Content();
- YQL_ENSURE(Suffix.SkipPrefix("Aggregate"));
- AggList = Node->HeadPtr();
- KeyColumns = Node->ChildPtr(1);
- AggregatedColumns = Node->Child(2);
- auto settings = Node->Child(3);
- bool allTraitsCollected = CollectTraits();
- YQL_ENSURE(!HasSetting(*settings, "hopping"), "Aggregate with hopping unsupported here.");
- HaveDistinct = AnyOf(AggregatedColumns->ChildrenList(),
- [](const auto& child) { return child->ChildrenSize() == 3; });
- EffectiveCompact = (HaveDistinct && CompactForDistinct && !UseBlocks) || ForceCompact || HasSetting(*settings, "compact");
- for (const auto& trait : Traits) {
- auto mergeLambda = trait->Child(5);
- if (mergeLambda->Tail().IsCallable("Void")) {
- EffectiveCompact = true;
- break;
- }
- }
- if (Suffix == "Finalize") {
- EffectiveCompact = true;
- Suffix = "";
- } else if (Suffix != "") {
- EffectiveCompact = false;
- }
- OriginalRowType = GetSeqItemType(*Node->Head().GetTypeAnn()).Cast<TStructExprType>();
- RowItems = OriginalRowType->GetItems();
- ProcessSessionSetting(GetSetting(*settings, "session"));
- RowType = Ctx.MakeType<TStructExprType>(RowItems);
- TVector<const TTypeAnnotationNode*> keyItemTypes = GetKeyItemTypes();
- bool needPickle = IsNeedPickle(keyItemTypes);
- auto keyExtractor = GetKeyExtractor(needPickle);
- CollectColumnsSpecs();
- if (Suffix == "" && !HaveSessionSetting && !EffectiveCompact && UsePhases) {
- return GeneratePhases();
- }
- if (UseBlocks) {
- if (Suffix == "Combine") {
- auto ret = TryGenerateBlockCombine();
- if (ret) {
- return ret;
- }
- }
- if (Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
- auto ret = TryGenerateBlockMergeFinalize();
- if (ret) {
- return ret;
- }
- }
- }
- if (!allTraitsCollected) {
- return RebuildAggregate();
- }
- BuildNothingStates();
- if (Suffix == "MergeState" || Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
- return GeneratePostAggregate(AggList, keyExtractor);
- }
- TExprNode::TPtr preAgg = GeneratePartialAggregate(keyExtractor, keyItemTypes, needPickle);
- if (EffectiveCompact || !preAgg) {
- preAgg = std::move(AggList);
- }
- if (Suffix == "Combine" || Suffix == "CombineState") {
- return preAgg;
- }
- return GeneratePostAggregate(preAgg, keyExtractor);
- }
- TExprNode::TPtr TAggregateExpander::ExpandAggApply(const TExprNode::TPtr& node)
- {
- auto name = node->Head().Content();
- if (name.StartsWith("pg_")) {
- auto func = name.SubStr(3);
- auto itemType = node->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- TVector<ui32> argTypes;
- bool needRetype = false;
- auto status = ExtractPgTypesFromMultiLambda(node->ChildRef(2), argTypes, needRetype, Ctx);
- YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
- const NPg::TAggregateDesc* aggDescPtr;
- if (node->Content().EndsWith("State")) {
- auto stateType = node->Child(2)->GetTypeAnn()->Cast<TPgExprType>()->GetId();
- auto resultType = node->GetTypeAnn()->Cast<TPgExprType>()->GetId();
- aggDescPtr = &NPg::LookupAggregation(TString(func), stateType, resultType);
- } else {
- aggDescPtr = &NPg::LookupAggregation(TString(func), argTypes);
- }
- return ExpandPgAggregationTraits(node->Pos(), *aggDescPtr, false, node->ChildPtr(2), argTypes, itemType, Ctx);
- }
- const TString modulePath = "/lib/yql/aggregate.yqls";
- auto exportsPtr = TypesCtx.Modules->GetModule(modulePath);
- YQL_ENSURE(exportsPtr, "Failed to get module " << modulePath);
- const auto& exports = exportsPtr->Symbols();
- const auto ex = exports.find(TString(name) + "_traits_factory");
- YQL_ENSURE(exports.cend() != ex);
- TNodeOnNodeOwnedMap deepClones;
- auto lambda = Ctx.DeepCopy(*ex->second, exportsPtr->ExprCtx(), deepClones, true, false);
- auto listTypeNode = Ctx.NewCallable(node->Pos(), "ListType", { node->ChildPtr(node->ChildrenSize() == 4 && !node->Child(3)->IsCallable("Void") ? 3 : 1) });
- auto extractor = node->ChildPtr(2);
- auto traits = Ctx.ReplaceNodes(lambda->TailPtr(), {
- {lambda->Head().Child(0), listTypeNode},
- {lambda->Head().Child(1), extractor}
- });
- Ctx.Step.Repeat(TExprStep::ExpandApplyForLambdas);
- auto status = ExpandApplyNoRepeat(traits, traits, Ctx);
- YQL_ENSURE(status != IGraphTransformer::TStatus::Error);
- return traits;
- }
- bool TAggregateExpander::CollectTraits() {
- bool allTraitsCollected = true;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- if (trait->IsCallable({ "AggApply", "AggApplyState", "AggApplyManyState" })) {
- trait = ExpandAggApply(trait);
- allTraitsCollected = false;
- }
- Traits.push_back(trait);
- }
- return allTraitsCollected;
- }
- TExprNode::TPtr TAggregateExpander::RebuildAggregate()
- {
- TExprNode::TListType newAggregatedColumnsItems = AggregatedColumns->ChildrenList();
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- if (trait->IsCallable("AggApply")) {
- newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(Traits[index]));
- } else if (trait->IsCallable("AggApplyState") || trait->IsCallable("AggApplyManyState")) {
- auto newTrait = Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Add(0, trait->ChildPtr(1))
- .Add(1, trait->ChildPtr(2)) // extractor for state, not initial value itself
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("Void")
- .Seal()
- .Seal()
- .Add(3, Traits[index]->ChildPtr(3))
- .Add(4, Traits[index]->ChildPtr(4))
- .Add(5, Traits[index]->ChildPtr(5))
- .Add(6, Traits[index]->ChildPtr(6))
- .Add(7, Traits[index]->ChildPtr(7))
- .Seal()
- .Build();
- newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(newTrait));
- }
- }
- return Ctx.ChangeChild(*Node, 2, Ctx.NewList(Node->Pos(), std::move(newAggregatedColumnsItems)));
- }
- TExprNode::TPtr TAggregateExpander::GetContextLambda()
- {
- return HasContextFuncs(*AggregatedColumns) ?
- Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Callable("WithContext")
- .Arg(0, "stream")
- .Atom(1, "Agg")
- .Seal()
- .Seal()
- .Build() :
- Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Arg("stream")
- .Seal()
- .Build();
- }
- void TAggregateExpander::ProcessSessionSetting(TExprNode::TPtr sessionSetting)
- {
- if (!sessionSetting) {
- return;
- }
- HaveSessionSetting = true;
- YQL_ENSURE(sessionSetting->Child(1)->Child(0)->IsAtom());
- SessionOutputColumn = sessionSetting->Child(1)->Child(0)->Content();
- // remove session column from other keys
- TExprNodeList keyColumnsList = KeyColumns->ChildrenList();
- EraseIf(keyColumnsList, [&](const auto& key) { return SessionOutputColumn == key->Content(); });
- KeyColumns = Ctx.NewList(KeyColumns->Pos(), std::move(keyColumnsList));
- SessionWindowParams.Traits = sessionSetting->Child(1)->ChildPtr(1);
- ExtractSessionWindowParams(Node->Pos(), SessionWindowParams, Ctx);
- ExtractSortKeyAndOrder(Node->Pos(), SessionWindowParams.SortTraits, SortParams, Ctx);
- if (HaveDistinct) {
- auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
- const auto sessionStartMemberLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, keySelector,
- SessionWindowParams, Ctx);
- AggList = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, AggList)
- .Add(1, keySelector)
- .Add(2, SortParams.Order)
- .Add(3, SortParams.Key)
- .Lambda(4)
- .Param("partitionedStream")
- .Apply(sessionStartMemberLambda)
- .With(0, "partitionedStream")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto keyColumnsList = KeyColumns->ChildrenList();
- keyColumnsList.push_back(Ctx.NewAtom(Node->Pos(), SessionStartMemberName));
- KeyColumns = Ctx.NewList(Node->Pos(), std::move(keyColumnsList));
- RowItems.push_back(Ctx.MakeType<TItemExprType>(SessionStartMemberName, SessionWindowParams.KeyType));
- SessionWindowParams.Reset();
- SortParams.Key = SortParams.Order = VoidNode;
- } else {
- EffectiveCompact = true;
- }
- }
- TVector<const TTypeAnnotationNode*> TAggregateExpander::GetKeyItemTypes()
- {
- TVector<const TTypeAnnotationNode*> keyItemTypes;
- for (auto keyColumn : KeyColumns->Children()) {
- auto index = RowType->FindItem(keyColumn->Content());
- YQL_ENSURE(index, "Unknown column: " << keyColumn->Content());
- auto type = RowType->GetItems()[*index]->GetItemType();
- keyItemTypes.push_back(type);
- }
- return keyItemTypes;
- }
- bool TAggregateExpander::IsNeedPickle(const TVector<const TTypeAnnotationNode*>& keyItemTypes)
- {
- bool needPickle = false;
- for (auto type : keyItemTypes) {
- needPickle |= !IsDataOrOptionalOfData(type);
- }
- return needPickle;
- }
- TExprNode::TPtr TAggregateExpander::GetKeyExtractor(bool needPickle)
- {
- TExprNode::TPtr keyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() == 0) {
- return parent.Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal();
- }
- else if (KeyColumns->ChildrenSize() == 1) {
- return parent.Callable("Member").Arg(0, "item").Add(1, KeyColumns->HeadPtr()).Seal();
- }
- else {
- auto listBuilder = parent.List();
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- listBuilder
- .Callable(pos++, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal();
- }
- return listBuilder.Seal();
- }
- })
- .Seal()
- .Build();
- if (needPickle) {
- keyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("StablePickle")
- .Apply(0, *keyExtractor)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- return keyExtractor;
- }
- void TAggregateExpander::CollectColumnsSpecs()
- {
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto child = AggregatedColumns->Child(index);
- if (const auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr) {
- const auto ins = Distinct2Columns.emplace(distinctField->Content(), TIdxSet());
- if (ins.second) {
- DistinctFields.push_back(distinctField);
- }
- ins.first->second.insert(InitialColumnNames.size());
- } else {
- NonDistinctColumns.insert(InitialColumnNames.size());
- }
- if (child->Head().IsAtom()) {
- FinalColumnNames.push_back(child->HeadPtr());
- } else {
- FinalColumnNames.push_back(child->Head().HeadPtr());
- }
- InitialColumnNames.push_back(Ctx.NewAtom(FinalColumnNames.back()->Pos(), "_yql_agg_" + ToString(InitialColumnNames.size()), TNodeFlags::Default));
- }
- }
- void TAggregateExpander::BuildNothingStates()
- {
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = Traits[index];
- auto saveLambda = trait->Child(3);
- auto saveLambdaType = saveLambda->GetTypeAnn();
- auto typeNode = ExpandType(Node->Pos(), *saveLambdaType, Ctx);
- NothingStates.push_back(Ctx.Builder(Node->Pos())
- .Callable("Nothing")
- .Callable(0, "OptionalType")
- .Add(0, std::move(typeNode))
- .Seal()
- .Seal()
- .Build()
- );
- }
- }
- TExprNode::TPtr TAggregateExpander::GeneratePartialAggregate(const TExprNode::TPtr keyExtractor,
- const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needPickle)
- {
- TExprNode::TPtr pickleTypeNode = nullptr;
- if (needPickle) {
- const TTypeAnnotationNode* pickleType = nullptr;
- pickleType = KeyColumns->ChildrenSize() > 1 ? Ctx.MakeType<TTupleExprType>(keyItemTypes) : keyItemTypes[0];
- pickleTypeNode = ExpandType(Node->Pos(), *pickleType, Ctx);
- }
- TExprNode::TPtr partialAgg = nullptr;
- if (!NonDistinctColumns.empty()) {
- partialAgg = GeneratePartialAggregateForNonDistinct(keyExtractor, pickleTypeNode);
- }
- for (ui32 index = 0; index < DistinctFields.size(); ++index) {
- auto distinctField = DistinctFields[index];
- bool needDistinctPickle = EffectiveCompact ? false : needPickle;
- auto distinctGrouper = GenerateDistinctGrouper(distinctField, keyItemTypes, needDistinctPickle);
- if (!partialAgg) {
- partialAgg = std::move(distinctGrouper);
- } else {
- partialAgg = Ctx.Builder(Node->Pos())
- .Callable("Extend")
- .Add(0, std::move(partialAgg))
- .Add(1, std::move(distinctGrouper))
- .Seal()
- .Build();
- }
- }
- // If no aggregation functions then add additional combiner
- if (AggregatedColumns->ChildrenSize() == 0 && KeyColumns->ChildrenSize() > 0 && !SessionWindowParams.Update) {
- if (!partialAgg) {
- partialAgg = AggList;
- }
- auto uniqCombineInit = ReturnKeyAsIsForCombineInit(pickleTypeNode);
- auto uniqCombineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Build();
- // Return state as-is
- auto uniqCombineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Arg(0, "state")
- .Seal()
- .Seal()
- .Build();
- partialAgg = Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, std::move(partialAgg))
- .Add(1, PreMap)
- .Add(2, keyExtractor)
- .Add(3, std::move(uniqCombineInit))
- .Add(4, std::move(uniqCombineUpdate))
- .Add(5, std::move(uniqCombineSave))
- .Seal()
- .Build();
- }
- return partialAgg;
- }
- std::function<TExprNodeBuilder& (TExprNodeBuilder&)> TAggregateExpander::GetPartialAggArgExtractor(ui32 i, bool deserialize) {
- return [&, i, deserialize](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- auto trait = Traits[i];
- auto extractorLambda = trait->Child(1);
- auto loadLambda = trait->Child(4);
- if (Suffix == "CombineState") {
- if (deserialize) {
- parent.Apply(*loadLambda)
- .With(0)
- .Apply(*extractorLambda)
- .With(0)
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal()
- .Done()
- .Seal()
- .Done()
- .Seal();
- } else {
- parent.Apply(*extractorLambda)
- .With(0)
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal()
- .Done()
- .Seal();
- }
- } else {
- parent.Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- };
- }
- TExprNode::TPtr TAggregateExpander::GetFinalAggStateExtractor(ui32 i) {
- auto trait = Traits[i];
- if (Suffix.StartsWith("Merge")) {
- auto lambda = trait->ChildPtr(1);
- if (!Suffix.StartsWith("MergeMany")) {
- return lambda;
- }
- if (lambda->Tail().IsCallable("Unwrap")) {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .ApplyPartial(lambda->HeadPtr(), lambda->Tail().HeadPtr())
- .With(0, "item")
- .Seal()
- .Seal()
- .Build();
- } else {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("Just")
- .Apply(0, *lambda)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- }
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, columnNames[i])
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::MakeInputBlocks(const TExprNode::TPtr& stream, TExprNode::TListType& keyIdxs,
- TVector<TString>& outputColumns, TExprNode::TListType& aggs, bool overState, bool many, ui32* streamIdxColumn) {
- TVector<TString> inputColumns;
- auto flow = Ctx.NewCallable(Node->Pos(), "ToFlow", { stream });
- for (ui32 i = 0; i < RowType->GetSize(); ++i) {
- inputColumns.push_back(TString(RowType->GetItems()[i]->GetName()));
- }
- auto wideFlow = MakeExpandMap(Node->Pos(), inputColumns, flow, Ctx);
- TExprNode::TListType extractorArgs;
- TExprNode::TListType newRowItems;
- for (ui32 i = 0; i < RowType->GetSize(); ++i) {
- extractorArgs.push_back(Ctx.NewArgument(Node->Pos(), "field" + ToString(i)));
- newRowItems.push_back(Ctx.NewList(Node->Pos(), { Ctx.NewAtom(Node->Pos(), RowType->GetItems()[i]->GetName()), extractorArgs.back() }));
- }
- const TExprNode::TPtr newRow = Ctx.NewCallable(Node->Pos(), "AsStruct", std::move(newRowItems));
- TExprNode::TListType extractorRoots;
- TVector<const TTypeAnnotationNode*> allKeyTypes;
- for (ui32 index = 0; index < KeyColumns->ChildrenSize(); ++index) {
- auto keyName = KeyColumns->Child(index)->Content();
- auto rowIndex = RowType->FindItem(keyName);
- YQL_ENSURE(rowIndex, "Unknown column: " << keyName);
- auto type = RowType->GetItems()[*rowIndex]->GetItemType();
- extractorRoots.push_back(extractorArgs[*rowIndex]);
- allKeyTypes.push_back(type);
- keyIdxs.push_back(Ctx.NewAtom(Node->Pos(), ToString(index)));
- outputColumns.push_back(TString(keyName));
- }
- if (many) {
- auto rowIndex = RowType->FindItem("_yql_group_stream_index");
- if (!rowIndex) {
- return nullptr;
- }
- if (streamIdxColumn) {
- *streamIdxColumn = extractorRoots.size();
- }
- extractorRoots.push_back(extractorArgs[*rowIndex]);
- }
- auto outputStructType = GetSeqItemType(*Node->GetTypeAnn()).Cast<TStructExprType>();
- auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allKeyTypes, Ctx);
- YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
- if (resolveStatus != IArrowResolver::OK) {
- return nullptr;
- }
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- TVector<const TTypeAnnotationNode*> allTypes;
- const TTypeAnnotationNode* originalType = nullptr;
- if (overState && !trait->Child(3)->IsCallable("Void")) {
- auto originalExtractorType = trait->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- originalType = GetOriginalResultType(trait->Pos(), many, originalExtractorType, Ctx);
- YQL_ENSURE(originalType);
- }
- ui32 argsCount = trait->Child(2)->ChildrenSize() - 1;
- if (!overState && trait->Child(0)->Content() == "count_all") {
- argsCount = 0;
- }
- auto rowArg = &trait->Child(2)->Head().Head();
- const TNodeOnNodeOwnedMap remaps{ { rowArg, newRow } };
- TVector<TExprNode::TPtr> roots;
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- auto root = trait->Child(2)->ChildPtr(i);
- allTypes.push_back(root->GetTypeAnn());
- auto status = RemapExpr(root, root, remaps, Ctx, TOptimizeExprSettings(&TypesCtx));
- YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Error);
- roots.push_back(root);
- }
- aggs.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Callable(0, TString("AggBlockApply") + (overState ? "State" : ""))
- .Atom(0, trait->Child(0)->Content())
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (overState) {
- if (originalType) {
- parent.Add(1, ExpandType(Node->Pos(), *originalType, Ctx));
- } else {
- parent
- .Callable(1, "NullType")
- .Seal();
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- parent.Add(i + (overState ? 1 : 0), ExpandType(Node->Pos(), *trait->Child(2)->Child(i)->GetTypeAnn(), Ctx));
- }
- return parent;
- })
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- parent.Atom(i, ToString(extractorRoots.size() + i - 1));
- }
- return parent;
- })
- .Seal()
- .Build());
- for (auto root : roots) {
- if (many) {
- if (root->IsCallable("Unwrap")) {
- root = root->HeadPtr();
- } else {
- root = Ctx.Builder(Node->Pos())
- .Callable("Just")
- .Add(0, root)
- .Seal()
- .Build();
- }
- }
- extractorRoots.push_back(root);
- }
- auto outPos = outputStructType->FindItem(FinalColumnNames[index]->Content());
- YQL_ENSURE(outPos);
- allTypes.push_back(outputStructType->GetItems()[*outPos]->GetItemType());
- auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allTypes, Ctx);
- YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
- if (resolveStatus != IArrowResolver::OK) {
- return nullptr;
- }
- outputColumns.push_back(TString(FinalColumnNames[index]->Content()));
- }
- auto extractorLambda = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), std::move(extractorArgs)), std::move(extractorRoots));
- auto mappedWideFlow = Ctx.NewCallable(Node->Pos(), "WideMap", { wideFlow, extractorLambda });
- return Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "WideToBlocks")
- .Callable(0, "FromFlow")
- .Add(0, mappedWideFlow)
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombineAllOrHashed() {
- if (!TypesCtx.ArrowResolver) {
- return nullptr;
- }
- const bool hashed = (KeyColumns->ChildrenSize() > 0);
- const bool isInputList = (AggList->GetTypeAnn()->GetKind() == ETypeAnnotationKind::List);
- TExprNode::TListType keyIdxs;
- TVector<TString> outputColumns;
- TExprNode::TListType aggs;
- TExprNode::TPtr stream = nullptr;
- if (isInputList) {
- stream = Ctx.NewArgument(Node->Pos(), "stream");
- } else {
- stream = AggList;
- }
- TExprNode::TPtr blocks = MakeInputBlocks(stream, keyIdxs, outputColumns, aggs, false, false);
- if (!blocks) {
- return nullptr;
- }
- TExprNode::TPtr aggWideFlow;
- if (hashed) {
- aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "WideFromBlocks")
- .Callable(0, "BlockCombineHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Callable(1, "Void")
- .Seal()
- .Add(2, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(3, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockCombineAll")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Callable(1, "Void")
- .Seal()
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Build();
- }
- auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
- if (isInputList) {
- auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
- auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { stream }), std::move(root));
- return Ctx.Builder(Node->Pos())
- .Callable("LMap")
- .Add(0, AggList)
- .Lambda(1)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(lambdaStream)
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- return finalFlow;
- }
- }
- TExprNode::TPtr TAggregateExpander::GeneratePartialAggregateForNonDistinct(const TExprNode::TPtr& keyExtractor, const TExprNode::TPtr& pickleTypeNode)
- {
- bool combineOnly = Suffix == "Combine" || Suffix == "CombineState";
- const auto& columnNames = combineOnly ? FinalColumnNames : InitialColumnNames;
- auto initLambdaIndex = (Suffix == "CombineState") ? 4 : 1;
- auto updateLambdaIndex = (Suffix == "CombineState") ? 5 : 2;
- auto combineInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- for (ui32 i: NonDistinctColumns) {
- auto trait = Traits[i];
- auto initLambda = trait->Child(initLambdaIndex);
- if (initLambda->Head().ChildrenSize() == 1) {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, false))
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, false))
- .Done()
- .With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto combineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- for (ui32 i: NonDistinctColumns) {
- auto trait = Traits[i];
- auto updateLambda = trait->Child(updateLambdaIndex);
- if (updateLambda->Head().ChildrenSize() == 2) {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *updateLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, true))
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *updateLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, true))
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .With(2)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto combineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- if (NonDistinctColumns.find(i) == NonDistinctColumns.end()) {
- parent.List(i)
- .Add(0, columnNames[i])
- .Add(1, NothingStates[i])
- .Seal();
- } else {
- auto trait = Traits[i];
- auto saveLambda = trait->Child(3);
- if (!DistinctFields.empty()) {
- parent.List(i)
- .Add(0, columnNames[i])
- .Callable(1, "Just")
- .Apply(0, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(i)
- .Add(0, columnNames[i])
- .Apply(1, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- auto listBuilder = parent.List(columnNames.size() + i);
- listBuilder.Add(0, KeyColumns->ChildPtr(i));
- if (KeyColumns->ChildrenSize() > 1) {
- if (pickleTypeNode) {
- listBuilder
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos), TNodeFlags::Default)
- .Seal();
- } else {
- listBuilder
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos), TNodeFlags::Default)
- .Seal();
- }
- ++pos;
- } else {
- if (pickleTypeNode) {
- listBuilder.Callable(1, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal();
- } else {
- listBuilder.Arg(1, "key");
- }
- }
- listBuilder.Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, AggList)
- .Add(1, PreMap)
- .Add(2, keyExtractor)
- .Add(3, std::move(combineInit))
- .Add(4, std::move(combineUpdate))
- .Add(5, std::move(combineSave))
- .Seal()
- .Build();
- }
- void TAggregateExpander::GenerateInitForDistinct(TExprNodeBuilder& parent, ui32& ndx, const TIdxSet& indicies, const TExprNode::TPtr& distinctField) {
- for (ui32 i: indicies) {
- auto trait = Traits[i];
- auto initLambda = trait->Child(1);
- if (initLambda->Head().ChildrenSize() == 1) {
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- }
- TExprNode::TPtr TAggregateExpander::GenerateDistinctGrouper(const TExprNode::TPtr distinctField,
- const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needDistinctPickle)
- {
- auto& indicies = Distinct2Columns[distinctField->Content()];
- auto distinctIndex = RowType->FindItem(distinctField->Content());
- YQL_ENSURE(distinctIndex, "Unknown field: " << distinctField->Content());
- auto distinctType = RowType->GetItems()[*distinctIndex]->GetItemType();
- TVector<const TTypeAnnotationNode*> distinctKeyItemTypes = keyItemTypes;
- distinctKeyItemTypes.push_back(distinctType);
- auto valueType = distinctType;
- if (distinctType->GetKind() == ETypeAnnotationKind::Optional) {
- distinctType = distinctType->Cast<TOptionalExprType>()->GetItemType();
- }
- if (distinctType->GetKind() != ETypeAnnotationKind::Data) {
- needDistinctPickle = true;
- valueType = Ctx.MakeType<TDataExprType>(EDataSlot::String);
- }
- const auto expandedValueType = needDistinctPickle ?
- Ctx.Builder(Node->Pos())
- .Callable("DataType")
- .Atom(0, "String", TNodeFlags::Default)
- .Seal()
- .Build()
- : ExpandType(Node->Pos(), *valueType, Ctx);
- DistinctFieldNeedsPickle[distinctField->Content()] = needDistinctPickle;
- auto udfSetCreateValue = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.Create")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, expandedValueType)
- .Callable(1, "DataType")
- .Atom(0, "Uint32", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- UdfSetCreate[distinctField->Content()] = udfSetCreateValue;
- auto resourceType = Ctx.Builder(Node->Pos())
- .Callable("TypeOf")
- .Callable(0, "Apply")
- .Add(0, udfSetCreateValue)
- .Callable(1, "InstanceOf")
- .Add(0, expandedValueType)
- .Seal()
- .Callable(2, "Uint32")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Seal()
- .Build();
- UdfAddValue[distinctField->Content()] = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.AddValue")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, resourceType)
- .Add(1, expandedValueType)
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- UdfWasChanged[distinctField->Content()] = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.WasChanged")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, resourceType)
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- auto distinctKeyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() != 0) {
- auto listBuilder = parent.List();
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- listBuilder
- .Callable(pos++, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal();
- }
- listBuilder
- .Callable(pos, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- return listBuilder.Seal();
- } else {
- return parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- }
- })
- .Seal()
- .Build();
- const TTypeAnnotationNode* distinctPickleType = nullptr;
- TExprNode::TPtr distinctPickleTypeNode;
- if (needDistinctPickle) {
- distinctPickleType = KeyColumns->ChildrenSize() > 0 ? Ctx.MakeType<TTupleExprType>(distinctKeyItemTypes) : distinctKeyItemTypes.front();
- distinctPickleTypeNode = ExpandType(Node->Pos(), *distinctPickleType, Ctx);
- }
- if (needDistinctPickle) {
- distinctKeyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("StablePickle")
- .Apply(0, *distinctKeyExtractor).With(0, "item").Seal()
- .Seal()
- .Seal()
- .Build();
- }
- auto distinctCombineInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- GenerateInitForDistinct(parent, ndx, indicies, distinctField);
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto distinctCombineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Build();
- ui32 ndx = 0;
- auto distinctCombineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i: indicies) {
- auto trait = Traits[i];
- auto saveLambda = trait->Child(3);
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() > 0) {
- if (needDistinctPickle) {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(ndx++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- } else {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(ndx++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- } else {
- if (needDistinctPickle) {
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, distinctField)
- .Arg(1, "key")
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto distinctCombiner = Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, AggList)
- .Add(1, PreMap)
- .Add(2, distinctKeyExtractor)
- .Add(3, std::move(distinctCombineInit))
- .Add(4, std::move(distinctCombineUpdate))
- .Add(5, std::move(distinctCombineSave))
- .Seal()
- .Build();
- auto distinctGrouper = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, std::move(distinctCombiner))
- .Add(1, distinctKeyExtractor)
- .Callable(2, "Void").Seal()
- .Callable(3, "Void").Seal()
- .Lambda(4)
- .Param("groups")
- .Callable("Map")
- .Callable(0, "Condense1")
- .Arg(0, "groups")
- .Lambda(1)
- .Param("item")
- .Arg("item")
- .Seal()
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("IsKeySwitch")
- .Arg(0, "item")
- .Arg(1, "state")
- .Add(2, distinctKeyExtractor)
- .Add(3, distinctKeyExtractor)
- .Seal()
- .Seal()
- .Lambda(3)
- .Param("item")
- .Param("state")
- .Arg("item")
- .Seal()
- .Seal()
- .Lambda(1)
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < InitialColumnNames.size(); ++i) {
- if (indicies.find(i) != indicies.end()) {
- parent.List(i)
- .Add(0, InitialColumnNames[i])
- .Callable(1, "Just")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(i)
- .Add(0, InitialColumnNames[i])
- .Add(1, NothingStates[i])
- .Seal();
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() > 0) {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(InitialColumnNames.size() + i)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal().Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return distinctGrouper;
- }
- TExprNode::TPtr TAggregateExpander::ReturnKeyAsIsForCombineInit(const TExprNode::TPtr& pickleTypeNode)
- {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- auto listBuilder = parent.List(i);
- listBuilder.Add(0, KeyColumns->Child(i));
- if (KeyColumns->ChildrenSize() > 1) {
- if (pickleTypeNode) {
- listBuilder
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal();
- } else {
- listBuilder
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal();
- }
- } else {
- if (pickleTypeNode) {
- listBuilder.Callable(1, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal();
- } else {
- listBuilder.Arg(1, "key");
- }
- }
- listBuilder.Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::BuildFinalizeByKeyLambda(const TExprNode::TPtr& preprocessLambda, const TExprNode::TPtr& keyExtractor) {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Callable("FinalizeByKey")
- .Arg(0, "stream")
- .Lambda(1)
- .Param("item")
- .Callable("Just")
- .Apply(0, preprocessLambda)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Add(2, keyExtractor)
- .Lambda(3)
- .Param("key")
- .Param("item")
- .Apply(GeneratePostAggregateInitPhase())
- .With(0, "item")
- .Seal()
- .Seal()
- .Lambda(4)
- .Param("key")
- .Param("item")
- .Param("state")
- .Apply(GeneratePostAggregateMergePhase())
- .With(0, "item")
- .With(1, "state")
- .Seal()
- .Seal()
- .Lambda(5)
- .Param("key")
- .Param("state")
- .Apply(GeneratePostAggregateSavePhase())
- .With(0, "state")
- .Seal()
- .Seal()
- .Seal()
- .Seal().Build();
- }
- TExprNode::TPtr TAggregateExpander::CountAggregateRewrite(const NNodes::TCoAggregate& node, TExprContext& ctx, bool useBlocks) {
- auto keyColumns = node.Keys();
- auto aggregatedColumns = node.Handlers();
- if (keyColumns.Size() > 0 || aggregatedColumns.Size() != 1) {
- return node.Ptr();
- }
- auto settings = node.Settings();
- auto hoppingSetting = GetSetting(settings.Ref(), "hopping");
- if (hoppingSetting) {
- return node.Ptr();
- }
- if (GetSetting(settings.Ref(), "session")) {
- // TODO: support
- return node.Ptr();
- }
- auto aggregatedColumn = aggregatedColumns.Item(0);
- const bool isDistinct = (aggregatedColumn.Ref().ChildrenSize() == 3);
- auto traits = aggregatedColumn.Ref().Child(1);
- auto outputColumn = aggregatedColumn.Ref().HeadPtr();
- // validation of traits
- const TTypeAnnotationNode* inputItemType;
- bool onlyColumn = true;
- bool onlyZero = true;
- TExprNode::TPtr initVal;
- if (traits->IsCallable("AggregationTraits")) {
- inputItemType = traits->Head().GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- auto init = NNodes::TCoLambda(traits->Child(1));
- TExprNode::TPtr updateVal;
- if (init.Body().Ref().IsCallable("Uint64") &&
- init.Body().Ref().Head().Content() == "1") {
- onlyZero = false;
- } else if (init.Body().Ref().IsCallable("Uint64") &&
- init.Body().Ref().Head().Content() == "0") {
- onlyColumn = false;
- } else if (init.Body().Ref().IsCallable("AggrCountInit")) {
- initVal = init.Body().Ref().HeadPtr();
- onlyColumn = onlyColumn && init.Body().Ref().Child(0) == init.Args().Arg(0).Raw();
- onlyZero = false;
- } else {
- return node.Ptr();
- }
- auto update = NNodes::TCoLambda(traits->Child(2));
- auto inc = update.Body().Ptr();
- if (inc->IsCallable("Inc") && inc->Child(0) == update.Args().Arg(1).Raw()) {
- onlyZero = false;
- } else if (inc->IsCallable("AggrCountUpdate") && inc->Child(1) == update.Args().Arg(1).Raw()) {
- updateVal = inc->HeadPtr();
- onlyColumn = onlyColumn && inc->Child(0) == update.Args().Arg(0).Raw();
- onlyZero = false;
- } else if (inc == update.Args().Arg(1).Raw()) {
- onlyColumn = false;
- } else {
- return node.Ptr();
- }
- auto save = NNodes::TCoLambda(traits->Child(3));
- if (save.Body().Raw() != save.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto load = NNodes::TCoLambda(traits->Child(4));
- if (load.Body().Raw() != load.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto merge = NNodes::TCoLambda(traits->Child(5));
- {
- auto& plus = merge.Body().Ref();
- if (!plus.IsCallable({ "+", "AggrAdd" }) ) {
- return node.Ptr();
- }
- if (!(plus.Child(0) == merge.Args().Arg(0).Raw() &&
- plus.Child(1) == merge.Args().Arg(1).Raw())) {
- return node.Ptr();
- }
- }
- auto finish = NNodes::TCoLambda(traits->Child(6));
- if (finish.Body().Raw() != finish.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto defVal = traits->Child(7);
- if (!defVal->IsCallable("Uint64") || defVal->Head().Content() != "0") {
- return node.Ptr();
- }
- if (!isDistinct) {
- if (!onlyZero && !onlyColumn) {
- if (!initVal || !updateVal || initVal != updateVal) {
- return node.Ptr();
- }
- }
- }
- } else if (traits->IsCallable("AggApply")) {
- if (traits->Head().Content() != "count_all" && traits->Head().Content() != "count") {
- return node.Ptr();
- }
- inputItemType = traits->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- onlyZero = false;
- onlyColumn = false;
- if (&traits->Child(2)->Head().Head() == &traits->Child(2)->Tail()) {
- onlyColumn = true;
- }
- if (!isDistinct) {
- if (traits->Head().Content() == "count") {
- initVal = traits->Child(2)->TailPtr();
- if (initVal->GetTypeAnn()->IsOptionalOrNull()) {
- if (IsDepended(traits->Child(2)->Tail(), traits->Child(2)->Head().Head())) {
- return node.Ptr();
- }
- } else {
- initVal = nullptr;
- }
- }
- }
- } else {
- return node.Ptr();
- }
- const bool isOptionalColumn = inputItemType->GetKind() == ETypeAnnotationKind::Optional;
- if (!isDistinct) {
- auto length = ctx.Builder(node.Pos())
- .Callable("Length")
- .Add(0, node.Input().Ptr())
- .Seal()
- .Build();
- if (onlyZero) {
- length = ctx.Builder(node.Pos())
- .Callable("Uint64")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Build();
- } else if (!onlyColumn && initVal) {
- length = ctx.Builder(node.Pos())
- .Callable("If")
- .Callable(0, "Exists")
- .Add(0, initVal)
- .Seal()
- .Add(1, std::move(length))
- .Callable(2, "Uint64")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Build();
- }
- auto ret = ctx.Builder(node.Pos())
- .Callable("AsList")
- .Callable(0, "AsStruct")
- .List(0)
- .Add(0, std::move(outputColumn))
- .Add(1, std::move(length))
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return ret;
- }
- if (useBlocks || !onlyColumn) {
- return node.Ptr();
- }
- auto removedOptionalType = inputItemType;
- if (isOptionalColumn) {
- removedOptionalType = removedOptionalType->Cast<TOptionalExprType>()->GetItemType();
- }
- const bool needPickle = removedOptionalType->GetKind() != ETypeAnnotationKind::Data;
- auto pickleTypeNode = ExpandType(node.Pos(), *inputItemType, ctx);
- auto distictColumn = aggregatedColumn.Ref().ChildPtr(2);
- auto combine = ctx.Builder(node.Pos())
- .Callable("CombineByKey")
- .Callable(0, "ExtractMembers")
- .Add(0, node.Input().Ptr())
- .List(1)
- .Add(0, distictColumn)
- .Seal()
- .Seal()
- .Lambda(1)
- .Param("row")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (isOptionalColumn) {
- parent.Callable("Map")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, distictColumn)
- .Seal()
- .Lambda(1)
- .Param("unpacked")
- .Arg("unpacked")
- .Seal()
- .Seal();
- } else {
- parent.Callable("Just")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, distictColumn)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Lambda(2)
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (needPickle) {
- parent.Callable("StablePickle")
- .Arg(0, "item")
- .Seal();
- } else {
- parent.Arg("item");
- }
- return parent;
- })
- .Seal()
- .Lambda(3)
- .Param("key")
- .Param("item")
- .Callable("Void")
- .Seal()
- .Seal()
- .Lambda(4)
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Lambda(5)
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .List(0)
- .Atom(0, "value")
- .Arg(1, "key")
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto groupByKey = ctx.Builder(node.Pos())
- .Callable("PartitionByKey")
- .Add(0, combine)
- .Lambda(1)
- .Param("combineRow")
- .Callable("Member")
- .Arg(0, "combineRow")
- .Atom(1, "value")
- .Seal()
- .Seal()
- .Callable(2, "Void")
- .Seal()
- .Callable(3, "Void")
- .Seal()
- .Lambda(4)
- .Param("groups")
- .Callable("Map")
- .Arg(0, "groups")
- .Lambda(1)
- .Param("group")
- .Callable("AsStruct")
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto ret = ctx.Builder(node.Pos())
- .Callable("AsList")
- .Callable(0, "AsStruct")
- .List(0)
- .Add(0, outputColumn)
- .Callable(1, "Length")
- .Add(0, std::move(groupByKey))
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return ret;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregate(const TExprNode::TPtr& preAgg, const TExprNode::TPtr& keyExtractor)
- {
- auto preprocessLambda = GeneratePreprocessLambda(keyExtractor);
- TExprNode::TPtr postAgg;
- if (!UsePartitionsByKeys && UseFinalizeByKeys && !HaveSessionSetting) {
- postAgg = Ctx.Builder(Node->Pos())
- .Callable("ShuffleByKeys")
- .Add(0, std::move(preAgg))
- .Add(1, keyExtractor)
- .Lambda(2)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(BuildFinalizeByKeyLambda(preprocessLambda, keyExtractor))
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal().Build();
- } else {
- auto condenseSwitch = GenerateCondenseSwitch(keyExtractor);
- postAgg = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, std::move(preAgg))
- .Add(1, keyExtractor)
- .Add(2, SortParams.Order)
- .Add(3, SortParams.Key)
- .Lambda(4)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Callable("Map")
- .Callable(0, "Condense1")
- .Apply(0, preprocessLambda)
- .With(0, "stream")
- .Seal()
- .Add(1, GeneratePostAggregateInitPhase())
- .Add(2, condenseSwitch)
- .Add(3, GeneratePostAggregateMergePhase())
- .Seal()
- .Add(1, GeneratePostAggregateSavePhase())
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal().Build();
- }
- if (KeyColumns->ChildrenSize() == 0 && !HaveSessionSetting && (Suffix == "" || Suffix.EndsWith("Finalize"))) {
- return MakeSingleGroupRow(*Node, postAgg, Ctx);
- }
- return postAgg;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePreprocessLambda(const TExprNode::TPtr& keyExtractor)
- {
- TExprNode::TPtr preprocessLambda;
- if (SessionWindowParams.Update) {
- YQL_ENSURE(EffectiveCompact);
- YQL_ENSURE(SessionWindowParams.Key);
- YQL_ENSURE(SessionWindowParams.KeyType);
- YQL_ENSURE(SessionWindowParams.Init);
- preprocessLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, "", keyExtractor,
- SessionWindowParams.Key, SessionWindowParams.Init, SessionWindowParams.Update, Ctx);
- } else {
- YQL_ENSURE(!SessionWindowParams.Key);
- preprocessLambda = MakeIdentityLambda(Node->Pos(), Ctx);
- }
- return preprocessLambda;
- }
- TExprNode::TPtr TAggregateExpander::GenerateCondenseSwitch(const TExprNode::TPtr& keyExtractor)
- {
- TExprNode::TPtr condenseSwitch;
- if (SessionWindowParams.Update) {
- YQL_ENSURE(EffectiveCompact);
- YQL_ENSURE(SessionWindowParams.Key);
- YQL_ENSURE(SessionWindowParams.KeyType);
- YQL_ENSURE(SessionWindowParams.Init);
- condenseSwitch = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("Or")
- .Callable(0, "AggrNotEquals")
- .Apply(0, keyExtractor)
- .With(0, "item")
- .Seal()
- .Apply(1, keyExtractor)
- .With(0, "state")
- .Seal()
- .Seal()
- .Callable(1, "AggrNotEquals")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- YQL_ENSURE(!SessionWindowParams.Key);
- condenseSwitch = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("IsKeySwitch")
- .Arg(0, "item")
- .Arg(1, "state")
- .Add(2, keyExtractor)
- .Add(3, keyExtractor)
- .Seal()
- .Seal()
- .Build();
- }
- return condenseSwitch;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateInitPhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionWindowParams.Update) {
- parent
- .List(index++)
- .Atom(0, SessionStartMemberName)
- .Callable(1, "Member")
- .Arg(0, "item")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- if (!EffectiveCompact) {
- auto loadLambda = trait->Child(4);
- auto extractorLambda = GetFinalAggStateExtractor(i);
- if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "Map")
- .Apply(0, *extractorLambda)
- .With(0, "item")
- .Seal()
- .Add(1, loadLambda)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Apply(1, *loadLambda)
- .With(0)
- .Apply(*extractorLambda)
- .With(0, "item")
- .Seal()
- .Done()
- .Seal();
- }
- } else {
- auto initLambda = trait->Child(1);
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- auto initApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Apply(1, *initLambda)
- .With(0)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField) {
- parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *initLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- })
- .Done()
- .Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
- if (initLambda->Head().ChildrenSize() == 2) {
- parent.With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done();
- }
- return parent;
- })
- .Seal();
- return parent;
- };
- if (distinctField) {
- const bool isFirst = *Distinct2Columns[distinctField->Content()].begin() == i;
- if (isFirst) {
- parent.List(index++)
- .Add(0, columnNames[i])
- .List(1)
- .Callable(0, "NamedApply")
- .Add(0, UdfSetCreate[distinctField->Content()])
- .List(1)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(0, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Callable(1, "Uint32")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Callable(3, "DependsOn")
- .Callable(0, "String")
- .Add(0, distinctField)
- .Seal()
- .Seal()
- .Seal()
- .Do(initApply)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(initApply)
- .Seal();
- }
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(initApply)
- .Seal();
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateSavePhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- if (KeyColumns->Child(i)->Content() == SessionStartMemberName) {
- continue;
- }
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionOutputColumn) {
- parent
- .List(index++)
- .Atom(0, *SessionOutputColumn)
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- auto finishLambda = (Suffix == "MergeState") ? trait->Child(3) : trait->Child(6);
- if (!EffectiveCompact && (!DistinctFields.empty() || Suffix == "MergeManyFinalize")) {
- if (child->Head().IsAtom()) {
- parent.List(index++)
- .Add(0, FinalColumnNames[i])
- .Callable(1, "Unwrap")
- .Callable(0, "Map")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(1, finishLambda)
- .Seal()
- .Seal()
- .Seal();
- } else {
- const auto& multiFields = child->Child(0);
- for (ui32 field = 0; field < multiFields->ChildrenSize(); ++field) {
- parent.List(index++)
- .Atom(0, multiFields->Child(field)->Content())
- .Callable(1, "Nth")
- .Callable(0, "Unwrap")
- .Callable(0, "Map")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(1, finishLambda)
- .Seal()
- .Seal()
- .Atom(1, ToString(field), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- }
- } else {
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- auto stateExtractor = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
- if (distinctField && isFirst) {
- parent.Callable("Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Atom(1, "1", TNodeFlags::Default)
- .Seal();
- } else {
- parent.Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal();
- }
- return parent;
- };
- if (child->Head().IsAtom()) {
- parent.List(index++)
- .Add(0, FinalColumnNames[i])
- .Apply(1, *finishLambda)
- .With(0)
- .Do(stateExtractor)
- .Done()
- .Seal()
- .Seal();
- } else {
- const auto& multiFields = child->Head();
- for (ui32 field = 0; field < multiFields.ChildrenSize(); ++field) {
- parent.List(index++)
- .Atom(0, multiFields.Child(field)->Content())
- .Callable(1, "Nth")
- .Apply(0, *finishLambda)
- .With(0)
- .Do(stateExtractor)
- .Done()
- .Seal()
- .Atom(1, ToString(field), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateMergePhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionWindowParams.Update) {
- parent
- .List(index++)
- .Atom(0, SessionStartMemberName)
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- if (!EffectiveCompact) {
- auto loadLambda = trait->Child(4);
- auto mergeLambda = trait->Child(5);
- auto extractorLambda = GetFinalAggStateExtractor(i);
- if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "OptionalReduce")
- .Callable(0, "Map")
- .Apply(0, extractorLambda)
- .With(0, "item")
- .Seal()
- .Add(1, loadLambda)
- .Seal()
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(2, mergeLambda)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Apply(1, *mergeLambda)
- .With(0)
- .Apply(*loadLambda)
- .With(0)
- .Apply(extractorLambda)
- .With(0, "item")
- .Seal()
- .Done()
- .Seal()
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- } else {
- auto updateLambda = trait->Child(2);
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
- auto updateApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Apply(1, *updateLambda)
- .With(0)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField) {
- parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *updateLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- })
- .Done()
- .With(1)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField && isFirst) {
- parent.Callable("Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Atom(1, "1", TNodeFlags::Default)
- .Seal();
- } else {
- parent.Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal();
- }
- return parent;
- })
- .Done()
- .Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
- if (updateLambda->Head().ChildrenSize() == 3) {
- parent
- .With(2)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done();
- }
- return parent;
- })
- .Seal();
- return parent;
- };
- if (distinctField) {
- auto distinctIndex = *Distinct2Columns[distinctField->Content()].begin();
- ui32 newValueIndex = 0;
- auto newValue = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Callable(newValueIndex, "NamedApply")
- .Add(0, UdfAddValue[distinctField->Content()])
- .List(1)
- .Callable(0, "Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[distinctIndex])
- .Seal()
- .Atom(1, "0", TNodeFlags::Default)
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(1, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal();
- return parent;
- };
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "If")
- .Callable(0, "NamedApply")
- .Add(0, UdfWasChanged[distinctField->Content()])
- .List(1)
- .Callable(0, "NamedApply")
- .Add(0, UdfAddValue[distinctField->Content()])
- .List(1)
- .Callable(0, "Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[distinctIndex])
- .Seal()
- .Atom(1, "0", TNodeFlags::Default)
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(1, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal()
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctIndex == i) {
- parent.List(1)
- .Do(newValue)
- .Do(updateApply)
- .Seal();
- } else {
- parent.Do(updateApply);
- }
- return parent;
- })
- .Callable(2, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(updateApply)
- .Seal();
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GenerateJustOverStates(const TExprNode::TPtr& input, const TIdxSet& indicies) {
- return Ctx.Builder(Node->Pos())
- .Callable("Map")
- .Add(0, input)
- .Lambda(1)
- .Param("row")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(pos++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "row")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- for (ui32 i : indicies) {
- parent
- .List(pos++)
- .Add(0, InitialColumnNames[i])
- .Callable(1, "Just")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::SerializeIdxSet(const TIdxSet& indicies) {
- return Ctx.Builder(Node->Pos())
- .List()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i : indicies) {
- parent.Atom(pos++, ToString(i));
- }
- return parent;
- })
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePhases() {
- const TExprNode::TPtr cleanOutputSettings = RemoveSetting(*Node->Child(3), "output_columns", Ctx);
- const bool many = HaveDistinct;
- YQL_CLOG(DEBUG, Core) << "Aggregate: generate " << (many ? "phases with distinct" : "simple phases");
- TExprNode::TListType mergeTraits;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto originalTrait = AggregatedColumns->Child(index)->ChildPtr(1);
- auto extractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, InitialColumnNames[index])
- .Seal()
- .Seal()
- .Build();
- if (many) {
- extractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Callable("Unwrap")
- .Apply(0, extractor)
- .With(0, "row")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- bool isAggApply = originalTrait->IsCallable("AggApply");
- auto serializedStateType = isAggApply ? AggApplySerializedStateType(originalTrait, Ctx) : originalTrait->Child(3)->GetTypeAnn();
- if (many) {
- serializedStateType = Ctx.MakeType<TOptionalExprType>(serializedStateType);
- }
- auto extractorTypeNode = Ctx.Builder(Node->Pos())
- .Callable("StructType")
- .List(0)
- .Add(0, InitialColumnNames[index])
- .Add(1, ExpandType(Node->Pos(), *serializedStateType, Ctx))
- .Seal()
- .Seal()
- .Build();
- if (isAggApply) {
- auto initialType = originalTrait->GetTypeAnn();
- if (many) {
- initialType = Ctx.MakeType<TOptionalExprType>(initialType);
- }
- auto originalExtractorTypeNode = Ctx.Builder(Node->Pos())
- .Callable("StructType")
- .List(0)
- .Add(0, InitialColumnNames[index])
- .Add(1, ExpandType(Node->Pos(), *initialType, Ctx))
- .Seal()
- .Seal()
- .Build();
- auto name = TString(originalTrait->ChildPtr(0)->Content());
- if (name.StartsWith("pg_")) {
- auto func = name.substr(3);
- TVector<ui32> argTypes;
- bool needRetype = false;
- auto status = ExtractPgTypesFromMultiLambda(originalTrait->ChildRef(2), argTypes, needRetype, Ctx);
- YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
- const NPg::TAggregateDesc& aggDesc = NPg::LookupAggregation(TString(func), argTypes);
- name = "pg_" + aggDesc.Name + "#" + ToString(aggDesc.AggId);
- }
- mergeTraits.push_back(Ctx.Builder(Node->Pos())
- .Callable(many ? "AggApplyManyState" : "AggApplyState")
- .Atom(0, name)
- .Add(1, extractorTypeNode)
- .Add(2, extractor)
- .Add(3, originalExtractorTypeNode)
- .Seal()
- .Build());
- } else {
- YQL_ENSURE(originalTrait->IsCallable("AggregationTraits"));
- mergeTraits.push_back(Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Add(0, extractorTypeNode)
- .Add(1, extractor)
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("Void")
- .Seal()
- .Seal()
- .Add(3, originalTrait->ChildPtr(3))
- .Add(4, originalTrait->ChildPtr(4))
- .Add(5, originalTrait->ChildPtr(5))
- .Add(6, originalTrait->ChildPtr(6))
- .Add(7, originalTrait->ChildPtr(7))
- .Seal()
- .Build());
- }
- }
- TExprNode::TListType finalizeColumns;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- finalizeColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, AggregatedColumns->Child(index)->ChildPtr(0))
- .Add(1, mergeTraits[index])
- .Seal()
- .Build());
- }
- if (!many) {
- // simple Combine + MergeFinalize
- TExprNode::TListType combineColumns;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[index])
- .Add(1, AggregatedColumns->Child(index)->ChildPtr(1))
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, AggList)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- auto mergeFinalize = Ctx.Builder(Node->Pos())
- .Callable("AggregateMergeFinalize")
- .Add(0, combine)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- return mergeFinalize;
- }
- // process with distincts
- // Combine + Map with Just over states
- // for each distinct field:
- // Aggregate by keys + field w/o aggs
- // Combine by keys + field with aggs
- // Map with Just over states
- // UnionAll
- // MergeManyFinalize
- TExprNode::TListType unionAllInputs;
- TExprNode::TListType streams;
- if (!NonDistinctColumns.empty()) {
- TExprNode::TListType combineColumns;
- for (ui32 i : NonDistinctColumns) {
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[i])
- .Add(1, AggregatedColumns->Child(i)->ChildPtr(1))
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, AggList)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- unionAllInputs.push_back(GenerateJustOverStates(combine, NonDistinctColumns));
- streams.push_back(SerializeIdxSet(NonDistinctColumns));
- }
- for (ui32 index = 0; index < DistinctFields.size(); ++index) {
- auto distinctField = DistinctFields[index];
- auto& indicies = Distinct2Columns[distinctField->Content()];
- TExprNode::TListType allKeyColumns = KeyColumns->ChildrenList();
- allKeyColumns.push_back(distinctField);
- auto distinct = Ctx.Builder(Node->Pos())
- .Callable("Aggregate")
- .Add(0, AggList)
- .Add(1, Ctx.NewList(Node->Pos(), std::move(allKeyColumns)))
- .List(2)
- .Seal()
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- TExprNode::TListType combineColumns;
- for (ui32 i : indicies) {
- auto trait = AggregatedColumns->Child(i)->ChildPtr(1);
- bool isAggApply = trait->IsCallable("AggApply");
- if (isAggApply) {
- trait = Ctx.Builder(Node->Pos())
- .Callable("AggApply")
- .Add(0, trait->ChildPtr(0))
- .Callable(1, "StructType")
- .List(0)
- .Add(0, distinctField)
- .Add(1, trait->ChildPtr(1))
- .Seal()
- .Seal()
- .Lambda(2)
- .Param("row")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- TExprNode::TPtr newInit;
- if (trait->ChildPtr(1)->Head().ChildrenSize() == 1) {
- newInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Apply(trait->ChildPtr(1))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Build();
- } else {
- newInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("parent")
- .Apply(trait->ChildPtr(1))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "parent")
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr newUpdate;
- if (trait->ChildPtr(2)->Head().ChildrenSize() == 2) {
- newUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("state")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "state")
- .Seal()
- .Seal()
- .Build();
- } else {
- newUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("state")
- .Param("parent")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "state")
- .With(2, "parent")
- .Seal()
- .Seal()
- .Build();
- }
- trait = Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Callable(0, "StructType")
- .List(0)
- .Add(0, distinctField)
- .Add(1, trait->ChildPtr(0))
- .Seal()
- .Seal()
- .Add(1, newInit)
- .Add(2, newUpdate)
- .Add(3, trait->ChildPtr(3))
- .Add(4, trait->ChildPtr(4))
- .Add(5, trait->ChildPtr(5))
- .Add(6, trait->ChildPtr(6))
- .Add(7, trait->ChildPtr(7))
- .Seal()
- .Build();
- }
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[i])
- .Add(1, trait)
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, distinct)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- unionAllInputs.push_back(GenerateJustOverStates(combine, indicies));
- streams.push_back(SerializeIdxSet(indicies));
- }
- if (UseBlocks) {
- for (ui32 i = 0; i < unionAllInputs.size(); ++i) {
- unionAllInputs[i] = Ctx.Builder(Node->Pos())
- .Callable("Map")
- .Add(0, unionAllInputs[i])
- .Lambda(1)
- .Param("row")
- .Callable("AddMember")
- .Arg(0, "row")
- .Atom(1, "_yql_group_stream_index")
- .Callable(2, "Uint32")
- .Atom(0, ToString(i))
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- }
- auto settings = cleanOutputSettings;
- if (UseBlocks) {
- settings = AddSetting(*settings, Node->Pos(), "many_streams", Ctx.NewList(Node->Pos(), std::move(streams)), Ctx);
- }
- auto unionAll = Ctx.NewCallable(Node->Pos(), "UnionAll", std::move(unionAllInputs));
- auto mergeManyFinalize = Ctx.Builder(Node->Pos())
- .Callable("AggregateMergeManyFinalize")
- .Add(0, unionAll)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
- .Add(3, settings)
- .Seal()
- .Build();
- return mergeManyFinalize;
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombine() {
- if (HaveSessionSetting || HaveDistinct) {
- return nullptr;
- }
- for (const auto& x : AggregatedColumns->Children()) {
- auto trait = x->ChildPtr(1);
- if (!trait->IsCallable("AggApply")) {
- return nullptr;
- }
- }
- return TryGenerateBlockCombineAllOrHashed();
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalize() {
- if (UsePartitionsByKeys || !UseBlocks) {
- return nullptr;
- }
- if (HaveSessionSetting || HaveDistinct) {
- return nullptr;
- }
- for (const auto& x : AggregatedColumns->Children()) {
- auto trait = x->ChildPtr(1);
- if (!trait->IsCallable({ "AggApplyState", "AggApplyManyState" })) {
- return nullptr;
- }
- }
- return TryGenerateBlockMergeFinalizeHashed();
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalizeHashed() {
- if (!TypesCtx.ArrowResolver) {
- return nullptr;
- }
- if (KeyColumns->ChildrenSize() == 0) {
- return nullptr;
- }
- bool isMany = Suffix == "MergeManyFinalize";
- auto streamArg = Ctx.NewArgument(Node->Pos(), "stream");
- TExprNode::TListType keyIdxs;
- TVector<TString> outputColumns;
- TExprNode::TListType aggs;
- ui32 streamIdxColumn;
- auto blocks = MakeInputBlocks(streamArg, keyIdxs, outputColumns, aggs, true, isMany, &streamIdxColumn);
- if (!blocks) {
- return nullptr;
- }
- TExprNode::TPtr aggBlocks;
- if (!isMany) {
- aggBlocks = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockMergeFinalizeHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Build();
- } else {
- auto manyStreamsSetting = GetSetting(*Node->Child(3), "many_streams");
- YQL_ENSURE(manyStreamsSetting, "Missing many_streams setting");
- aggBlocks = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockMergeManyFinalizeHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Atom(3, ToString(streamIdxColumn))
- .Add(4, manyStreamsSetting->TailPtr())
- .Seal()
- .Seal()
- .Build();
- }
- auto aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "WideFromBlocks")
- .Callable(0, "FromFlow")
- .Add(0, aggBlocks)
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
- auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
- auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { streamArg }), std::move(root));
- auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
- return Ctx.Builder(Node->Pos())
- .Callable("ShuffleByKeys")
- .Add(0, AggList)
- .Add(1, keySelector)
- .Lambda(2)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(lambdaStream)
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr ExpandAggregatePeephole(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) {
- if (NNodes::TCoAggregate::Match(node.Get())) {
- NNodes::TCoAggregate self(node);
- auto ret = TAggregateExpander::CountAggregateRewrite(self, ctx, typesCtx.IsBlockEngineEnabled());
- if (ret != node) {
- YQL_CLOG(DEBUG, Core) << "CountAggregateRewrite on peephole";
- return ret;
- }
- }
- return ExpandAggregatePeepholeImpl(node, ctx, typesCtx, false, typesCtx.IsBlockEngineEnabled(), false);
- }
- } // namespace NYql
|