1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977 |
- #include "yql_aggregate_expander.h"
- #include <yql/essentials/core/yql_expr_optimize.h>
- #include <yql/essentials/core/yql_expr_type_annotation.h>
- #include <yql/essentials/core/yql_opt_window.h>
- #include <yql/essentials/core/yql_opt_utils.h>
- #include <yql/essentials/core/yql_type_helpers.h>
- #include <yql/essentials/utils/log/log.h>
- namespace NYql {
- TExprNode::TPtr TAggregateExpander::ExpandAggregate() {
- YQL_CLOG(DEBUG, Core) << "Expand " << Node->Content();
- auto result = ExpandAggregateWithFullOutput();
- if (result) {
- auto outputColumns = GetSetting(*Node->Child(NNodes::TCoAggregate::idx_Settings), "output_columns");
- if (outputColumns) {
- result = Ctx.NewCallable(result->Pos(), "ExtractMembers", { result, outputColumns->ChildPtr(1) });
- }
- }
- return result;
- }
- TExprNode::TPtr TAggregateExpander::ExpandAggregateWithFullOutput()
- {
- Suffix = Node->Content();
- YQL_ENSURE(Suffix.SkipPrefix("Aggregate"));
- AggList = Node->HeadPtr();
- KeyColumns = Node->ChildPtr(1);
- AggregatedColumns = Node->Child(2);
- auto settings = Node->Child(3);
- bool allTraitsCollected = CollectTraits();
- YQL_ENSURE(!HasSetting(*settings, "hopping"), "Aggregate with hopping unsupported here.");
- HaveDistinct = AnyOf(AggregatedColumns->ChildrenList(),
- [](const auto& child) { return child->ChildrenSize() == 3; });
- EffectiveCompact = (HaveDistinct && CompactForDistinct && !UseBlocks) || ForceCompact || HasSetting(*settings, "compact");
- for (const auto& trait : Traits) {
- auto mergeLambda = trait->Child(5);
- if (mergeLambda->Tail().IsCallable("Void")) {
- EffectiveCompact = true;
- break;
- }
- }
- if (Suffix == "Finalize") {
- EffectiveCompact = true;
- Suffix = "";
- } else if (Suffix != "") {
- EffectiveCompact = false;
- }
- OriginalRowType = GetSeqItemType(*Node->Head().GetTypeAnn()).Cast<TStructExprType>();
- RowItems = OriginalRowType->GetItems();
- ProcessSessionSetting(GetSetting(*settings, "session"));
- RowType = Ctx.MakeType<TStructExprType>(RowItems);
- TVector<const TTypeAnnotationNode*> keyItemTypes = GetKeyItemTypes();
- bool needPickle = IsNeedPickle(keyItemTypes);
- auto keyExtractor = GetKeyExtractor(needPickle);
- CollectColumnsSpecs();
- if (Suffix == "" && !HaveSessionSetting && !EffectiveCompact && UsePhases) {
- return GeneratePhases();
- }
- if (UseBlocks) {
- if (Suffix == "Combine") {
- auto ret = TryGenerateBlockCombine();
- if (ret) {
- return ret;
- }
- }
- if (Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
- auto ret = TryGenerateBlockMergeFinalize();
- if (ret) {
- return ret;
- }
- }
- }
- if (!allTraitsCollected) {
- return RebuildAggregate();
- }
- BuildNothingStates();
- if (Suffix == "MergeState" || Suffix == "MergeFinalize" || Suffix == "MergeManyFinalize") {
- return GeneratePostAggregate(AggList, keyExtractor);
- }
- TExprNode::TPtr preAgg = GeneratePartialAggregate(keyExtractor, keyItemTypes, needPickle);
- if (EffectiveCompact || !preAgg) {
- preAgg = std::move(AggList);
- }
- if (Suffix == "Combine" || Suffix == "CombineState") {
- return preAgg;
- }
- return GeneratePostAggregate(preAgg, keyExtractor);
- }
- TExprNode::TPtr TAggregateExpander::ExpandAggApply(const TExprNode::TPtr& node)
- {
- auto name = node->Head().Content();
- if (name.StartsWith("pg_")) {
- auto func = name.SubStr(3);
- auto itemType = node->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- TVector<ui32> argTypes;
- bool needRetype = false;
- auto status = ExtractPgTypesFromMultiLambda(node->ChildRef(2), argTypes, needRetype, Ctx);
- YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
- const NPg::TAggregateDesc* aggDescPtr;
- if (node->Content().EndsWith("State")) {
- auto stateType = node->Child(2)->GetTypeAnn()->Cast<TPgExprType>()->GetId();
- auto resultType = node->GetTypeAnn()->Cast<TPgExprType>()->GetId();
- aggDescPtr = &NPg::LookupAggregation(TString(func), stateType, resultType);
- } else {
- aggDescPtr = &NPg::LookupAggregation(TString(func), argTypes);
- }
- return ExpandPgAggregationTraits(node->Pos(), *aggDescPtr, false, node->ChildPtr(2), argTypes, itemType, Ctx);
- }
- const TString modulePath = "/lib/yql/aggregate.yqls";
- auto exportsPtr = TypesCtx.Modules->GetModule(modulePath);
- YQL_ENSURE(exportsPtr, "Failed to get module " << modulePath);
- const auto& exports = exportsPtr->Symbols();
- const auto ex = exports.find(TString(name) + "_traits_factory");
- YQL_ENSURE(exports.cend() != ex);
- TNodeOnNodeOwnedMap deepClones;
- auto lambda = Ctx.DeepCopy(*ex->second, exportsPtr->ExprCtx(), deepClones, true, false);
- auto listTypeNode = Ctx.NewCallable(node->Pos(), "ListType", { node->ChildPtr(node->ChildrenSize() == 4 && !node->Child(3)->IsCallable("Void") ? 3 : 1) });
- auto extractor = node->ChildPtr(2);
- auto traits = Ctx.ReplaceNodes(lambda->TailPtr(), {
- {lambda->Head().Child(0), listTypeNode},
- {lambda->Head().Child(1), extractor}
- });
- Ctx.Step.Repeat(TExprStep::ExpandApplyForLambdas);
- auto status = ExpandApplyNoRepeat(traits, traits, Ctx);
- YQL_ENSURE(status != IGraphTransformer::TStatus::Error);
- return traits;
- }
- bool TAggregateExpander::CollectTraits() {
- bool allTraitsCollected = true;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- if (trait->IsCallable({ "AggApply", "AggApplyState", "AggApplyManyState" })) {
- trait = ExpandAggApply(trait);
- allTraitsCollected = false;
- }
- Traits.push_back(trait);
- }
- return allTraitsCollected;
- }
- TExprNode::TPtr TAggregateExpander::RebuildAggregate()
- {
- TExprNode::TListType newAggregatedColumnsItems = AggregatedColumns->ChildrenList();
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- if (trait->IsCallable("AggApply")) {
- newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(Traits[index]));
- } else if (trait->IsCallable("AggApplyState") || trait->IsCallable("AggApplyManyState")) {
- auto newTrait = Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Add(0, trait->ChildPtr(1))
- .Add(1, trait->ChildPtr(2)) // extractor for state, not initial value itself
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("Void")
- .Seal()
- .Seal()
- .Add(3, Traits[index]->ChildPtr(3))
- .Add(4, Traits[index]->ChildPtr(4))
- .Add(5, Traits[index]->ChildPtr(5))
- .Add(6, Traits[index]->ChildPtr(6))
- .Add(7, Traits[index]->ChildPtr(7))
- .Seal()
- .Build();
- newAggregatedColumnsItems[index] = Ctx.ChangeChild(*(newAggregatedColumnsItems[index]), 1, std::move(newTrait));
- }
- }
- return Ctx.ChangeChild(*Node, 2, Ctx.NewList(Node->Pos(), std::move(newAggregatedColumnsItems)));
- }
- TExprNode::TPtr TAggregateExpander::GetContextLambda()
- {
- return HasContextFuncs(*AggregatedColumns) ?
- Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Callable("WithContext")
- .Arg(0, "stream")
- .Atom(1, "Agg")
- .Seal()
- .Seal()
- .Build() :
- Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Arg("stream")
- .Seal()
- .Build();
- }
- void TAggregateExpander::ProcessSessionSetting(TExprNode::TPtr sessionSetting)
- {
- if (!sessionSetting) {
- return;
- }
- HaveSessionSetting = true;
- YQL_ENSURE(sessionSetting->Child(1)->Child(0)->IsAtom());
- SessionOutputColumn = sessionSetting->Child(1)->Child(0)->Content();
- // remove session column from other keys
- TExprNodeList keyColumnsList = KeyColumns->ChildrenList();
- EraseIf(keyColumnsList, [&](const auto& key) { return SessionOutputColumn == key->Content(); });
- KeyColumns = Ctx.NewList(KeyColumns->Pos(), std::move(keyColumnsList));
- SessionWindowParams.Traits = sessionSetting->Child(1)->ChildPtr(1);
- ExtractSessionWindowParams(Node->Pos(), SessionWindowParams, Ctx);
- ExtractSortKeyAndOrder(Node->Pos(), SessionWindowParams.SortTraits, SortParams, Ctx);
- if (HaveDistinct) {
- auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
- const auto sessionStartMemberLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, keySelector,
- SessionWindowParams, Ctx);
- AggList = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, AggList)
- .Add(1, keySelector)
- .Add(2, SortParams.Order)
- .Add(3, SortParams.Key)
- .Lambda(4)
- .Param("partitionedStream")
- .Apply(sessionStartMemberLambda)
- .With(0, "partitionedStream")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto keyColumnsList = KeyColumns->ChildrenList();
- keyColumnsList.push_back(Ctx.NewAtom(Node->Pos(), SessionStartMemberName));
- KeyColumns = Ctx.NewList(Node->Pos(), std::move(keyColumnsList));
- RowItems.push_back(Ctx.MakeType<TItemExprType>(SessionStartMemberName, SessionWindowParams.KeyType));
- SessionWindowParams.Reset();
- SortParams.Key = SortParams.Order = VoidNode;
- } else {
- EffectiveCompact = true;
- }
- }
- TVector<const TTypeAnnotationNode*> TAggregateExpander::GetKeyItemTypes()
- {
- TVector<const TTypeAnnotationNode*> keyItemTypes;
- for (auto keyColumn : KeyColumns->Children()) {
- auto index = RowType->FindItem(keyColumn->Content());
- YQL_ENSURE(index, "Unknown column: " << keyColumn->Content());
- auto type = RowType->GetItems()[*index]->GetItemType();
- keyItemTypes.push_back(type);
- }
- return keyItemTypes;
- }
- bool TAggregateExpander::IsNeedPickle(const TVector<const TTypeAnnotationNode*>& keyItemTypes)
- {
- bool needPickle = false;
- for (auto type : keyItemTypes) {
- needPickle |= !IsDataOrOptionalOfData(type);
- }
- return needPickle;
- }
- TExprNode::TPtr TAggregateExpander::GetKeyExtractor(bool needPickle)
- {
- TExprNode::TPtr keyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() == 0) {
- return parent.Callable("Uint32").Atom(0, "0", TNodeFlags::Default).Seal();
- }
- else if (KeyColumns->ChildrenSize() == 1) {
- return parent.Callable("Member").Arg(0, "item").Add(1, KeyColumns->HeadPtr()).Seal();
- }
- else {
- auto listBuilder = parent.List();
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- listBuilder
- .Callable(pos++, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal();
- }
- return listBuilder.Seal();
- }
- })
- .Seal()
- .Build();
- if (needPickle) {
- keyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("StablePickle")
- .Apply(0, *keyExtractor)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- return keyExtractor;
- }
- void TAggregateExpander::CollectColumnsSpecs()
- {
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto child = AggregatedColumns->Child(index);
- if (const auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr) {
- const auto ins = Distinct2Columns.emplace(distinctField->Content(), TIdxSet());
- if (ins.second) {
- DistinctFields.push_back(distinctField);
- }
- ins.first->second.insert(InitialColumnNames.size());
- } else {
- NonDistinctColumns.insert(InitialColumnNames.size());
- }
- if (child->Head().IsAtom()) {
- FinalColumnNames.push_back(child->HeadPtr());
- } else {
- FinalColumnNames.push_back(child->Head().HeadPtr());
- }
- InitialColumnNames.push_back(Ctx.NewAtom(FinalColumnNames.back()->Pos(), "_yql_agg_" + ToString(InitialColumnNames.size()), TNodeFlags::Default));
- }
- }
- void TAggregateExpander::BuildNothingStates()
- {
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = Traits[index];
- auto saveLambda = trait->Child(3);
- auto saveLambdaType = saveLambda->GetTypeAnn();
- auto typeNode = ExpandType(Node->Pos(), *saveLambdaType, Ctx);
- NothingStates.push_back(Ctx.Builder(Node->Pos())
- .Callable("Nothing")
- .Callable(0, "OptionalType")
- .Add(0, std::move(typeNode))
- .Seal()
- .Seal()
- .Build()
- );
- }
- }
- TExprNode::TPtr TAggregateExpander::GeneratePartialAggregate(const TExprNode::TPtr keyExtractor,
- const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needPickle)
- {
- TExprNode::TPtr pickleTypeNode = nullptr;
- if (needPickle) {
- const TTypeAnnotationNode* pickleType = nullptr;
- pickleType = KeyColumns->ChildrenSize() > 1 ? Ctx.MakeType<TTupleExprType>(keyItemTypes) : keyItemTypes[0];
- pickleTypeNode = ExpandType(Node->Pos(), *pickleType, Ctx);
- }
- TExprNode::TPtr partialAgg = nullptr;
- if (!NonDistinctColumns.empty()) {
- partialAgg = GeneratePartialAggregateForNonDistinct(keyExtractor, pickleTypeNode);
- }
- for (ui32 index = 0; index < DistinctFields.size(); ++index) {
- auto distinctField = DistinctFields[index];
- bool needDistinctPickle = EffectiveCompact ? false : needPickle;
- auto distinctGrouper = GenerateDistinctGrouper(distinctField, keyItemTypes, needDistinctPickle);
- if (!partialAgg) {
- partialAgg = std::move(distinctGrouper);
- } else {
- partialAgg = Ctx.Builder(Node->Pos())
- .Callable("Extend")
- .Add(0, std::move(partialAgg))
- .Add(1, std::move(distinctGrouper))
- .Seal()
- .Build();
- }
- }
- // If no aggregation functions then add additional combiner
- if (AggregatedColumns->ChildrenSize() == 0 && KeyColumns->ChildrenSize() > 0 && !SessionWindowParams.Update) {
- if (!partialAgg) {
- partialAgg = AggList;
- }
- auto uniqCombineInit = ReturnKeyAsIsForCombineInit(pickleTypeNode);
- auto uniqCombineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Build();
- // Return state as-is
- auto uniqCombineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Arg(0, "state")
- .Seal()
- .Seal()
- .Build();
- partialAgg = Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, std::move(partialAgg))
- .Add(1, PreMap)
- .Add(2, keyExtractor)
- .Add(3, std::move(uniqCombineInit))
- .Add(4, std::move(uniqCombineUpdate))
- .Add(5, std::move(uniqCombineSave))
- .Seal()
- .Build();
- }
- return partialAgg;
- }
- std::function<TExprNodeBuilder& (TExprNodeBuilder&)> TAggregateExpander::GetPartialAggArgExtractor(ui32 i, bool deserialize) {
- return [&, i, deserialize](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- auto trait = Traits[i];
- auto extractorLambda = trait->Child(1);
- auto loadLambda = trait->Child(4);
- if (Suffix == "CombineState") {
- if (deserialize) {
- parent.Apply(*loadLambda)
- .With(0)
- .Apply(*extractorLambda)
- .With(0)
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal()
- .Done()
- .Seal()
- .Done()
- .Seal();
- } else {
- parent.Apply(*extractorLambda)
- .With(0)
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal()
- .Done()
- .Seal();
- }
- } else {
- parent.Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *extractorLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- };
- }
- TExprNode::TPtr TAggregateExpander::GetFinalAggStateExtractor(ui32 i) {
- auto trait = Traits[i];
- if (Suffix.StartsWith("Merge")) {
- auto lambda = trait->ChildPtr(1);
- if (!Suffix.StartsWith("MergeMany")) {
- return lambda;
- }
- if (lambda->Tail().IsCallable("Unwrap")) {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .ApplyPartial(lambda->HeadPtr(), lambda->Tail().HeadPtr())
- .With(0, "item")
- .Seal()
- .Seal()
- .Build();
- } else {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("Just")
- .Apply(0, *lambda)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- }
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, columnNames[i])
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::MakeInputBlocks(const TExprNode::TPtr& stream, TExprNode::TListType& keyIdxs,
- TVector<TString>& outputColumns, TExprNode::TListType& aggs, bool overState, bool many, ui32* streamIdxColumn) {
- TVector<TString> inputColumns;
- auto flow = Ctx.NewCallable(Node->Pos(), "ToFlow", { stream });
- for (ui32 i = 0; i < RowType->GetSize(); ++i) {
- inputColumns.push_back(TString(RowType->GetItems()[i]->GetName()));
- }
- auto wideFlow = MakeExpandMap(Node->Pos(), inputColumns, flow, Ctx);
- TExprNode::TListType extractorArgs;
- TExprNode::TListType newRowItems;
- for (ui32 i = 0; i < RowType->GetSize(); ++i) {
- extractorArgs.push_back(Ctx.NewArgument(Node->Pos(), "field" + ToString(i)));
- newRowItems.push_back(Ctx.NewList(Node->Pos(), { Ctx.NewAtom(Node->Pos(), RowType->GetItems()[i]->GetName()), extractorArgs.back() }));
- }
- const TExprNode::TPtr newRow = Ctx.NewCallable(Node->Pos(), "AsStruct", std::move(newRowItems));
- TExprNode::TListType extractorRoots;
- TVector<const TTypeAnnotationNode*> allKeyTypes;
- for (ui32 index = 0; index < KeyColumns->ChildrenSize(); ++index) {
- auto keyName = KeyColumns->Child(index)->Content();
- auto rowIndex = RowType->FindItem(keyName);
- YQL_ENSURE(rowIndex, "Unknown column: " << keyName);
- auto type = RowType->GetItems()[*rowIndex]->GetItemType();
- extractorRoots.push_back(extractorArgs[*rowIndex]);
- allKeyTypes.push_back(type);
- keyIdxs.push_back(Ctx.NewAtom(Node->Pos(), ToString(index)));
- outputColumns.push_back(TString(keyName));
- }
- if (many) {
- auto rowIndex = RowType->FindItem("_yql_group_stream_index");
- if (!rowIndex) {
- return nullptr;
- }
- if (streamIdxColumn) {
- *streamIdxColumn = extractorRoots.size();
- }
- extractorRoots.push_back(extractorArgs[*rowIndex]);
- }
- auto outputStructType = GetSeqItemType(*Node->GetTypeAnn()).Cast<TStructExprType>();
- auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allKeyTypes, Ctx);
- YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
- if (resolveStatus != IArrowResolver::OK) {
- return nullptr;
- }
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto trait = AggregatedColumns->Child(index)->ChildPtr(1);
- TVector<const TTypeAnnotationNode*> allTypes;
- const TTypeAnnotationNode* originalType = nullptr;
- if (overState && !trait->Child(3)->IsCallable("Void")) {
- auto originalExtractorType = trait->Child(3)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- originalType = GetOriginalResultType(trait->Pos(), many, originalExtractorType, Ctx);
- YQL_ENSURE(originalType);
- }
- ui32 argsCount = trait->Child(2)->ChildrenSize() - 1;
- if (!overState && trait->Child(0)->Content() == "count_all") {
- argsCount = 0;
- }
- auto rowArg = &trait->Child(2)->Head().Head();
- const TNodeOnNodeOwnedMap remaps{ { rowArg, newRow } };
- TVector<TExprNode::TPtr> roots;
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- auto root = trait->Child(2)->ChildPtr(i);
- allTypes.push_back(root->GetTypeAnn());
- auto status = RemapExpr(root, root, remaps, Ctx, TOptimizeExprSettings(&TypesCtx));
- YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Error);
- roots.push_back(root);
- }
- aggs.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Callable(0, TString("AggBlockApply") + (overState ? "State" : ""))
- .Atom(0, trait->Child(0)->Content())
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (overState) {
- if (originalType) {
- parent.Add(1, ExpandType(Node->Pos(), *originalType, Ctx));
- } else {
- parent
- .Callable(1, "NullType")
- .Seal();
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- parent.Add(i + (overState ? 1 : 0), ExpandType(Node->Pos(), *trait->Child(2)->Child(i)->GetTypeAnn(), Ctx));
- }
- return parent;
- })
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 1; i < argsCount + 1; ++i) {
- parent.Atom(i, ToString(extractorRoots.size() + i - 1));
- }
- return parent;
- })
- .Seal()
- .Build());
- for (auto root : roots) {
- if (many) {
- if (root->IsCallable("Unwrap")) {
- root = root->HeadPtr();
- } else {
- root = Ctx.Builder(Node->Pos())
- .Callable("Just")
- .Add(0, root)
- .Seal()
- .Build();
- }
- }
- extractorRoots.push_back(root);
- }
- auto outPos = outputStructType->FindItem(FinalColumnNames[index]->Content());
- YQL_ENSURE(outPos);
- allTypes.push_back(outputStructType->GetItems()[*outPos]->GetItemType());
- auto resolveStatus = TypesCtx.ArrowResolver->AreTypesSupported(Ctx.GetPosition(Node->Pos()), allTypes, Ctx);
- YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
- if (resolveStatus != IArrowResolver::OK) {
- return nullptr;
- }
- outputColumns.push_back(TString(FinalColumnNames[index]->Content()));
- }
- auto extractorLambda = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), std::move(extractorArgs)), std::move(extractorRoots));
- auto mappedWideFlow = Ctx.NewCallable(Node->Pos(), "WideMap", { wideFlow, extractorLambda });
- auto blocks = Ctx.NewCallable(Node->Pos(), "WideToBlocks", { mappedWideFlow });
- return blocks;
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombineAllOrHashed() {
- if (!TypesCtx.ArrowResolver) {
- return nullptr;
- }
- const bool hashed = (KeyColumns->ChildrenSize() > 0);
- const bool isInputList = (AggList->GetTypeAnn()->GetKind() == ETypeAnnotationKind::List);
- TExprNode::TListType keyIdxs;
- TVector<TString> outputColumns;
- TExprNode::TListType aggs;
- TExprNode::TPtr stream = nullptr;
- if (isInputList) {
- stream = Ctx.NewArgument(Node->Pos(), "stream");
- } else {
- stream = AggList;
- }
- TExprNode::TPtr blocks = MakeInputBlocks(stream, keyIdxs, outputColumns, aggs, false, false);
- if (!blocks) {
- return nullptr;
- }
- TExprNode::TPtr aggWideFlow;
- if (hashed) {
- aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "WideFromBlocks")
- .Callable(0, "BlockCombineHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Callable(1, "Void")
- .Seal()
- .Add(2, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(3, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockCombineAll")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Callable(1, "Void")
- .Seal()
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Build();
- }
- auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
- if (isInputList) {
- auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
- auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { stream }), std::move(root));
- return Ctx.Builder(Node->Pos())
- .Callable("LMap")
- .Add(0, AggList)
- .Lambda(1)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(lambdaStream)
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- return finalFlow;
- }
- }
- TExprNode::TPtr TAggregateExpander::GeneratePartialAggregateForNonDistinct(const TExprNode::TPtr& keyExtractor, const TExprNode::TPtr& pickleTypeNode)
- {
- bool combineOnly = Suffix == "Combine" || Suffix == "CombineState";
- const auto& columnNames = combineOnly ? FinalColumnNames : InitialColumnNames;
- auto initLambdaIndex = (Suffix == "CombineState") ? 4 : 1;
- auto updateLambdaIndex = (Suffix == "CombineState") ? 5 : 2;
- auto combineInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- for (ui32 i: NonDistinctColumns) {
- auto trait = Traits[i];
- auto initLambda = trait->Child(initLambdaIndex);
- if (initLambda->Head().ChildrenSize() == 1) {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, false))
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, false))
- .Done()
- .With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto combineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- for (ui32 i: NonDistinctColumns) {
- auto trait = Traits[i];
- auto updateLambda = trait->Child(updateLambdaIndex);
- if (updateLambda->Head().ChildrenSize() == 2) {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *updateLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, true))
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, columnNames[i])
- .Apply(1, *updateLambda)
- .With(0)
- .Do(GetPartialAggArgExtractor(i, true))
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .With(2)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto combineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- if (NonDistinctColumns.find(i) == NonDistinctColumns.end()) {
- parent.List(i)
- .Add(0, columnNames[i])
- .Add(1, NothingStates[i])
- .Seal();
- } else {
- auto trait = Traits[i];
- auto saveLambda = trait->Child(3);
- if (!DistinctFields.empty()) {
- parent.List(i)
- .Add(0, columnNames[i])
- .Callable(1, "Just")
- .Apply(0, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(i)
- .Add(0, columnNames[i])
- .Apply(1, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- auto listBuilder = parent.List(columnNames.size() + i);
- listBuilder.Add(0, KeyColumns->ChildPtr(i));
- if (KeyColumns->ChildrenSize() > 1) {
- if (pickleTypeNode) {
- listBuilder
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos), TNodeFlags::Default)
- .Seal();
- } else {
- listBuilder
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos), TNodeFlags::Default)
- .Seal();
- }
- ++pos;
- } else {
- if (pickleTypeNode) {
- listBuilder.Callable(1, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal();
- } else {
- listBuilder.Arg(1, "key");
- }
- }
- listBuilder.Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, AggList)
- .Add(1, PreMap)
- .Add(2, keyExtractor)
- .Add(3, std::move(combineInit))
- .Add(4, std::move(combineUpdate))
- .Add(5, std::move(combineSave))
- .Seal()
- .Build();
- }
- void TAggregateExpander::GenerateInitForDistinct(TExprNodeBuilder& parent, ui32& ndx, const TIdxSet& indicies, const TExprNode::TPtr& distinctField) {
- for (ui32 i: indicies) {
- auto trait = Traits[i];
- auto initLambda = trait->Child(1);
- if (initLambda->Head().ChildrenSize() == 1) {
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *initLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- }
- }
- TExprNode::TPtr TAggregateExpander::GenerateDistinctGrouper(const TExprNode::TPtr distinctField,
- const TVector<const TTypeAnnotationNode*>& keyItemTypes, bool needDistinctPickle)
- {
- auto& indicies = Distinct2Columns[distinctField->Content()];
- auto distinctIndex = RowType->FindItem(distinctField->Content());
- YQL_ENSURE(distinctIndex, "Unknown field: " << distinctField->Content());
- auto distinctType = RowType->GetItems()[*distinctIndex]->GetItemType();
- TVector<const TTypeAnnotationNode*> distinctKeyItemTypes = keyItemTypes;
- distinctKeyItemTypes.push_back(distinctType);
- auto valueType = distinctType;
- if (distinctType->GetKind() == ETypeAnnotationKind::Optional) {
- distinctType = distinctType->Cast<TOptionalExprType>()->GetItemType();
- }
- if (distinctType->GetKind() != ETypeAnnotationKind::Data) {
- needDistinctPickle = true;
- valueType = Ctx.MakeType<TDataExprType>(EDataSlot::String);
- }
- const auto expandedValueType = needDistinctPickle ?
- Ctx.Builder(Node->Pos())
- .Callable("DataType")
- .Atom(0, "String", TNodeFlags::Default)
- .Seal()
- .Build()
- : ExpandType(Node->Pos(), *valueType, Ctx);
- DistinctFieldNeedsPickle[distinctField->Content()] = needDistinctPickle;
- auto udfSetCreateValue = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.Create")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, expandedValueType)
- .Callable(1, "DataType")
- .Atom(0, "Uint32", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- UdfSetCreate[distinctField->Content()] = udfSetCreateValue;
- auto resourceType = Ctx.Builder(Node->Pos())
- .Callable("TypeOf")
- .Callable(0, "Apply")
- .Add(0, udfSetCreateValue)
- .Callable(1, "InstanceOf")
- .Add(0, expandedValueType)
- .Seal()
- .Callable(2, "Uint32")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Seal()
- .Build();
- UdfAddValue[distinctField->Content()] = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.AddValue")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, resourceType)
- .Add(1, expandedValueType)
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- UdfWasChanged[distinctField->Content()] = Ctx.Builder(Node->Pos())
- .Callable("Udf")
- .Atom(0, "Set.WasChanged")
- .Callable(1, "Void").Seal()
- .Callable(2, "TupleType")
- .Callable(0, "TupleType")
- .Add(0, resourceType)
- .Seal()
- .Callable(1, "StructType").Seal()
- .Add(2, expandedValueType)
- .Seal()
- .Seal()
- .Build();
- auto distinctKeyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() != 0) {
- auto listBuilder = parent.List();
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- listBuilder
- .Callable(pos++, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal();
- }
- listBuilder
- .Callable(pos, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- return listBuilder.Seal();
- } else {
- return parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- }
- })
- .Seal()
- .Build();
- const TTypeAnnotationNode* distinctPickleType = nullptr;
- TExprNode::TPtr distinctPickleTypeNode;
- if (needDistinctPickle) {
- distinctPickleType = KeyColumns->ChildrenSize() > 0 ? Ctx.MakeType<TTupleExprType>(distinctKeyItemTypes) : distinctKeyItemTypes.front();
- distinctPickleTypeNode = ExpandType(Node->Pos(), *distinctPickleType, Ctx);
- }
- if (needDistinctPickle) {
- distinctKeyExtractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("StablePickle")
- .Apply(0, *distinctKeyExtractor).With(0, "item").Seal()
- .Seal()
- .Seal()
- .Build();
- }
- auto distinctCombineInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 ndx = 0;
- GenerateInitForDistinct(parent, ndx, indicies, distinctField);
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- auto distinctCombineUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Build();
- ui32 ndx = 0;
- auto distinctCombineSave = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i: indicies) {
- auto trait = Traits[i];
- auto saveLambda = trait->Child(3);
- parent.List(ndx++)
- .Add(0, InitialColumnNames[i])
- .Apply(1, *saveLambda)
- .With(0)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() > 0) {
- if (needDistinctPickle) {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(ndx++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- } else {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(ndx++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- } else {
- if (needDistinctPickle) {
- parent.List(ndx++)
- .Add(0, distinctField)
- .Callable(1, "Unpickle")
- .Add(0, distinctPickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Seal();
- } else {
- parent.List(ndx++)
- .Add(0, distinctField)
- .Arg(1, "key")
- .Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto distinctCombiner = Ctx.Builder(Node->Pos())
- .Callable("CombineByKey")
- .Add(0, AggList)
- .Add(1, PreMap)
- .Add(2, distinctKeyExtractor)
- .Add(3, std::move(distinctCombineInit))
- .Add(4, std::move(distinctCombineUpdate))
- .Add(5, std::move(distinctCombineSave))
- .Seal()
- .Build();
- auto distinctGrouper = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, std::move(distinctCombiner))
- .Add(1, distinctKeyExtractor)
- .Callable(2, "Void").Seal()
- .Callable(3, "Void").Seal()
- .Lambda(4)
- .Param("groups")
- .Callable("Map")
- .Callable(0, "Condense1")
- .Arg(0, "groups")
- .Lambda(1)
- .Param("item")
- .Arg("item")
- .Seal()
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("IsKeySwitch")
- .Arg(0, "item")
- .Arg(1, "state")
- .Add(2, distinctKeyExtractor)
- .Add(3, distinctKeyExtractor)
- .Seal()
- .Seal()
- .Lambda(3)
- .Param("item")
- .Param("state")
- .Arg("item")
- .Seal()
- .Seal()
- .Lambda(1)
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < InitialColumnNames.size(); ++i) {
- if (indicies.find(i) != indicies.end()) {
- parent.List(i)
- .Add(0, InitialColumnNames[i])
- .Callable(1, "Just")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(i)
- .Add(0, InitialColumnNames[i])
- .Add(1, NothingStates[i])
- .Seal();
- }
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (KeyColumns->ChildrenSize() > 0) {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent.List(InitialColumnNames.size() + i)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal().Seal();
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return distinctGrouper;
- }
- TExprNode::TPtr TAggregateExpander::ReturnKeyAsIsForCombineInit(const TExprNode::TPtr& pickleTypeNode)
- {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("key")
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- auto listBuilder = parent.List(i);
- listBuilder.Add(0, KeyColumns->Child(i));
- if (KeyColumns->ChildrenSize() > 1) {
- if (pickleTypeNode) {
- listBuilder
- .Callable(1, "Nth")
- .Callable(0, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal()
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal();
- } else {
- listBuilder
- .Callable(1, "Nth")
- .Arg(0, "key")
- .Atom(1, ToString(pos++), TNodeFlags::Default)
- .Seal();
- }
- } else {
- if (pickleTypeNode) {
- listBuilder.Callable(1, "Unpickle")
- .Add(0, pickleTypeNode)
- .Arg(1, "key")
- .Seal();
- } else {
- listBuilder.Arg(1, "key");
- }
- }
- listBuilder.Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::BuildFinalizeByKeyLambda(const TExprNode::TPtr& preprocessLambda, const TExprNode::TPtr& keyExtractor) {
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("stream")
- .Callable("FinalizeByKey")
- .Arg(0, "stream")
- .Lambda(1)
- .Param("item")
- .Callable("Just")
- .Apply(0, preprocessLambda)
- .With(0, "item")
- .Seal()
- .Seal()
- .Seal()
- .Add(2, keyExtractor)
- .Lambda(3)
- .Param("key")
- .Param("item")
- .Apply(GeneratePostAggregateInitPhase())
- .With(0, "item")
- .Seal()
- .Seal()
- .Lambda(4)
- .Param("key")
- .Param("item")
- .Param("state")
- .Apply(GeneratePostAggregateMergePhase())
- .With(0, "item")
- .With(1, "state")
- .Seal()
- .Seal()
- .Lambda(5)
- .Param("key")
- .Param("state")
- .Apply(GeneratePostAggregateSavePhase())
- .With(0, "state")
- .Seal()
- .Seal()
- .Seal()
- .Seal().Build();
- }
- TExprNode::TPtr TAggregateExpander::CountAggregateRewrite(const NNodes::TCoAggregate& node, TExprContext& ctx, bool useBlocks) {
- auto keyColumns = node.Keys();
- auto aggregatedColumns = node.Handlers();
- if (keyColumns.Size() > 0 || aggregatedColumns.Size() != 1) {
- return node.Ptr();
- }
- auto settings = node.Settings();
- auto hoppingSetting = GetSetting(settings.Ref(), "hopping");
- if (hoppingSetting) {
- return node.Ptr();
- }
- if (GetSetting(settings.Ref(), "session")) {
- // TODO: support
- return node.Ptr();
- }
- auto aggregatedColumn = aggregatedColumns.Item(0);
- const bool isDistinct = (aggregatedColumn.Ref().ChildrenSize() == 3);
- auto traits = aggregatedColumn.Ref().Child(1);
- auto outputColumn = aggregatedColumn.Ref().HeadPtr();
- // validation of traits
- const TTypeAnnotationNode* inputItemType;
- bool onlyColumn = true;
- bool onlyZero = true;
- TExprNode::TPtr initVal;
- if (traits->IsCallable("AggregationTraits")) {
- inputItemType = traits->Head().GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- auto init = NNodes::TCoLambda(traits->Child(1));
- TExprNode::TPtr updateVal;
- if (init.Body().Ref().IsCallable("Uint64") &&
- init.Body().Ref().Head().Content() == "1") {
- onlyZero = false;
- } else if (init.Body().Ref().IsCallable("Uint64") &&
- init.Body().Ref().Head().Content() == "0") {
- onlyColumn = false;
- } else if (init.Body().Ref().IsCallable("AggrCountInit")) {
- initVal = init.Body().Ref().HeadPtr();
- onlyColumn = onlyColumn && init.Body().Ref().Child(0) == init.Args().Arg(0).Raw();
- onlyZero = false;
- } else {
- return node.Ptr();
- }
- auto update = NNodes::TCoLambda(traits->Child(2));
- auto inc = update.Body().Ptr();
- if (inc->IsCallable("Inc") && inc->Child(0) == update.Args().Arg(1).Raw()) {
- onlyZero = false;
- } else if (inc->IsCallable("AggrCountUpdate") && inc->Child(1) == update.Args().Arg(1).Raw()) {
- updateVal = inc->HeadPtr();
- onlyColumn = onlyColumn && inc->Child(0) == update.Args().Arg(0).Raw();
- onlyZero = false;
- } else if (inc == update.Args().Arg(1).Raw()) {
- onlyColumn = false;
- } else {
- return node.Ptr();
- }
- auto save = NNodes::TCoLambda(traits->Child(3));
- if (save.Body().Raw() != save.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto load = NNodes::TCoLambda(traits->Child(4));
- if (load.Body().Raw() != load.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto merge = NNodes::TCoLambda(traits->Child(5));
- {
- auto& plus = merge.Body().Ref();
- if (!plus.IsCallable({ "+", "AggrAdd" }) ) {
- return node.Ptr();
- }
- if (!(plus.Child(0) == merge.Args().Arg(0).Raw() &&
- plus.Child(1) == merge.Args().Arg(1).Raw())) {
- return node.Ptr();
- }
- }
- auto finish = NNodes::TCoLambda(traits->Child(6));
- if (finish.Body().Raw() != finish.Args().Arg(0).Raw()) {
- return node.Ptr();
- }
- auto defVal = traits->Child(7);
- if (!defVal->IsCallable("Uint64") || defVal->Head().Content() != "0") {
- return node.Ptr();
- }
- if (!isDistinct) {
- if (!onlyZero && !onlyColumn) {
- if (!initVal || !updateVal || initVal != updateVal) {
- return node.Ptr();
- }
- }
- }
- } else if (traits->IsCallable("AggApply")) {
- if (traits->Head().Content() != "count_all" && traits->Head().Content() != "count") {
- return node.Ptr();
- }
- inputItemType = traits->Child(1)->GetTypeAnn()->Cast<TTypeExprType>()->GetType();
- onlyZero = false;
- onlyColumn = false;
- if (&traits->Child(2)->Head().Head() == &traits->Child(2)->Tail()) {
- onlyColumn = true;
- }
- if (!isDistinct) {
- if (traits->Head().Content() == "count") {
- initVal = traits->Child(2)->TailPtr();
- if (initVal->GetTypeAnn()->IsOptionalOrNull()) {
- if (IsDepended(traits->Child(2)->Tail(), traits->Child(2)->Head().Head())) {
- return node.Ptr();
- }
- } else {
- initVal = nullptr;
- }
- }
- }
- } else {
- return node.Ptr();
- }
- const bool isOptionalColumn = inputItemType->GetKind() == ETypeAnnotationKind::Optional;
- if (!isDistinct) {
- auto length = ctx.Builder(node.Pos())
- .Callable("Length")
- .Add(0, node.Input().Ptr())
- .Seal()
- .Build();
- if (onlyZero) {
- length = ctx.Builder(node.Pos())
- .Callable("Uint64")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Build();
- } else if (!onlyColumn && initVal) {
- length = ctx.Builder(node.Pos())
- .Callable("If")
- .Callable(0, "Exists")
- .Add(0, initVal)
- .Seal()
- .Add(1, std::move(length))
- .Callable(2, "Uint64")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Build();
- }
- auto ret = ctx.Builder(node.Pos())
- .Callable("AsList")
- .Callable(0, "AsStruct")
- .List(0)
- .Add(0, std::move(outputColumn))
- .Add(1, std::move(length))
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return ret;
- }
- if (useBlocks || !onlyColumn) {
- return node.Ptr();
- }
- auto removedOptionalType = inputItemType;
- if (isOptionalColumn) {
- removedOptionalType = removedOptionalType->Cast<TOptionalExprType>()->GetItemType();
- }
- const bool needPickle = removedOptionalType->GetKind() != ETypeAnnotationKind::Data;
- auto pickleTypeNode = ExpandType(node.Pos(), *inputItemType, ctx);
- auto distictColumn = aggregatedColumn.Ref().ChildPtr(2);
- auto combine = ctx.Builder(node.Pos())
- .Callable("CombineByKey")
- .Callable(0, "ExtractMembers")
- .Add(0, node.Input().Ptr())
- .List(1)
- .Add(0, distictColumn)
- .Seal()
- .Seal()
- .Lambda(1)
- .Param("row")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (isOptionalColumn) {
- parent.Callable("Map")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, distictColumn)
- .Seal()
- .Lambda(1)
- .Param("unpacked")
- .Arg("unpacked")
- .Seal()
- .Seal();
- } else {
- parent.Callable("Just")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, distictColumn)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Lambda(2)
- .Param("item")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (needPickle) {
- parent.Callable("StablePickle")
- .Arg(0, "item")
- .Seal();
- } else {
- parent.Arg("item");
- }
- return parent;
- })
- .Seal()
- .Lambda(3)
- .Param("key")
- .Param("item")
- .Callable("Void")
- .Seal()
- .Seal()
- .Lambda(4)
- .Param("key")
- .Param("item")
- .Param("state")
- .Arg("state")
- .Seal()
- .Lambda(5)
- .Param("key")
- .Param("state")
- .Callable("Just")
- .Callable(0, "AsStruct")
- .List(0)
- .Atom(0, "value")
- .Arg(1, "key")
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto groupByKey = ctx.Builder(node.Pos())
- .Callable("PartitionByKey")
- .Add(0, combine)
- .Lambda(1)
- .Param("combineRow")
- .Callable("Member")
- .Arg(0, "combineRow")
- .Atom(1, "value")
- .Seal()
- .Seal()
- .Callable(2, "Void")
- .Seal()
- .Callable(3, "Void")
- .Seal()
- .Lambda(4)
- .Param("groups")
- .Callable("Map")
- .Arg(0, "groups")
- .Lambda(1)
- .Param("group")
- .Callable("AsStruct")
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto ret = ctx.Builder(node.Pos())
- .Callable("AsList")
- .Callable(0, "AsStruct")
- .List(0)
- .Add(0, outputColumn)
- .Callable(1, "Length")
- .Add(0, std::move(groupByKey))
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- return ret;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregate(const TExprNode::TPtr& preAgg, const TExprNode::TPtr& keyExtractor)
- {
- auto preprocessLambda = GeneratePreprocessLambda(keyExtractor);
- TExprNode::TPtr postAgg;
- if (!UsePartitionsByKeys && UseFinalizeByKeys && !HaveSessionSetting) {
- postAgg = Ctx.Builder(Node->Pos())
- .Callable("ShuffleByKeys")
- .Add(0, std::move(preAgg))
- .Add(1, keyExtractor)
- .Lambda(2)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(BuildFinalizeByKeyLambda(preprocessLambda, keyExtractor))
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal().Build();
- } else {
- auto condenseSwitch = GenerateCondenseSwitch(keyExtractor);
- postAgg = Ctx.Builder(Node->Pos())
- .Callable("PartitionsByKeys")
- .Add(0, std::move(preAgg))
- .Add(1, keyExtractor)
- .Add(2, SortParams.Order)
- .Add(3, SortParams.Key)
- .Lambda(4)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Callable("Map")
- .Callable(0, "Condense1")
- .Apply(0, preprocessLambda)
- .With(0, "stream")
- .Seal()
- .Add(1, GeneratePostAggregateInitPhase())
- .Add(2, condenseSwitch)
- .Add(3, GeneratePostAggregateMergePhase())
- .Seal()
- .Add(1, GeneratePostAggregateSavePhase())
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal().Build();
- }
- if (KeyColumns->ChildrenSize() == 0 && !HaveSessionSetting && (Suffix == "" || Suffix.EndsWith("Finalize"))) {
- return MakeSingleGroupRow(*Node, postAgg, Ctx);
- }
- return postAgg;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePreprocessLambda(const TExprNode::TPtr& keyExtractor)
- {
- TExprNode::TPtr preprocessLambda;
- if (SessionWindowParams.Update) {
- YQL_ENSURE(EffectiveCompact);
- YQL_ENSURE(SessionWindowParams.Key);
- YQL_ENSURE(SessionWindowParams.KeyType);
- YQL_ENSURE(SessionWindowParams.Init);
- preprocessLambda = AddSessionParamsMemberLambda(Node->Pos(), SessionStartMemberName, "", keyExtractor,
- SessionWindowParams.Key, SessionWindowParams.Init, SessionWindowParams.Update, Ctx);
- } else {
- YQL_ENSURE(!SessionWindowParams.Key);
- preprocessLambda = MakeIdentityLambda(Node->Pos(), Ctx);
- }
- return preprocessLambda;
- }
- TExprNode::TPtr TAggregateExpander::GenerateCondenseSwitch(const TExprNode::TPtr& keyExtractor)
- {
- TExprNode::TPtr condenseSwitch;
- if (SessionWindowParams.Update) {
- YQL_ENSURE(EffectiveCompact);
- YQL_ENSURE(SessionWindowParams.Key);
- YQL_ENSURE(SessionWindowParams.KeyType);
- YQL_ENSURE(SessionWindowParams.Init);
- condenseSwitch = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("Or")
- .Callable(0, "AggrNotEquals")
- .Apply(0, keyExtractor)
- .With(0, "item")
- .Seal()
- .Apply(1, keyExtractor)
- .With(0, "state")
- .Seal()
- .Seal()
- .Callable(1, "AggrNotEquals")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- YQL_ENSURE(!SessionWindowParams.Key);
- condenseSwitch = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("IsKeySwitch")
- .Arg(0, "item")
- .Arg(1, "state")
- .Add(2, keyExtractor)
- .Add(3, keyExtractor)
- .Seal()
- .Seal()
- .Build();
- }
- return condenseSwitch;
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateInitPhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionWindowParams.Update) {
- parent
- .List(index++)
- .Atom(0, SessionStartMemberName)
- .Callable(1, "Member")
- .Arg(0, "item")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- if (!EffectiveCompact) {
- auto loadLambda = trait->Child(4);
- auto extractorLambda = GetFinalAggStateExtractor(i);
- if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "Map")
- .Apply(0, *extractorLambda)
- .With(0, "item")
- .Seal()
- .Add(1, loadLambda)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Apply(1, *loadLambda)
- .With(0)
- .Apply(*extractorLambda)
- .With(0, "item")
- .Seal()
- .Done()
- .Seal();
- }
- } else {
- auto initLambda = trait->Child(1);
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- auto initApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Apply(1, *initLambda)
- .With(0)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField) {
- parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *initLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- })
- .Done()
- .Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
- if (initLambda->Head().ChildrenSize() == 2) {
- parent.With(1)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done();
- }
- return parent;
- })
- .Seal();
- return parent;
- };
- if (distinctField) {
- const bool isFirst = *Distinct2Columns[distinctField->Content()].begin() == i;
- if (isFirst) {
- parent.List(index++)
- .Add(0, columnNames[i])
- .List(1)
- .Callable(0, "NamedApply")
- .Add(0, UdfSetCreate[distinctField->Content()])
- .List(1)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(0, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Callable(1, "Uint32")
- .Atom(0, "0", TNodeFlags::Default)
- .Seal()
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Callable(3, "DependsOn")
- .Callable(0, "String")
- .Add(0, distinctField)
- .Seal()
- .Seal()
- .Seal()
- .Do(initApply)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(initApply)
- .Seal();
- }
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(initApply)
- .Seal();
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateSavePhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- if (KeyColumns->Child(i)->Content() == SessionStartMemberName) {
- continue;
- }
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionOutputColumn) {
- parent
- .List(index++)
- .Atom(0, *SessionOutputColumn)
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- auto finishLambda = (Suffix == "MergeState") ? trait->Child(3) : trait->Child(6);
- if (!EffectiveCompact && (!DistinctFields.empty() || Suffix == "MergeManyFinalize")) {
- if (child->Head().IsAtom()) {
- parent.List(index++)
- .Add(0, FinalColumnNames[i])
- .Callable(1, "Unwrap")
- .Callable(0, "Map")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(1, finishLambda)
- .Seal()
- .Seal()
- .Seal();
- } else {
- const auto& multiFields = child->Child(0);
- for (ui32 field = 0; field < multiFields->ChildrenSize(); ++field) {
- parent.List(index++)
- .Atom(0, multiFields->Child(field)->Content())
- .Callable(1, "Nth")
- .Callable(0, "Unwrap")
- .Callable(0, "Map")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(1, finishLambda)
- .Seal()
- .Seal()
- .Atom(1, ToString(field), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- }
- } else {
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- auto stateExtractor = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
- if (distinctField && isFirst) {
- parent.Callable("Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Atom(1, "1", TNodeFlags::Default)
- .Seal();
- } else {
- parent.Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal();
- }
- return parent;
- };
- if (child->Head().IsAtom()) {
- parent.List(index++)
- .Add(0, FinalColumnNames[i])
- .Apply(1, *finishLambda)
- .With(0)
- .Do(stateExtractor)
- .Done()
- .Seal()
- .Seal();
- } else {
- const auto& multiFields = child->Head();
- for (ui32 field = 0; field < multiFields.ChildrenSize(); ++field) {
- parent.List(index++)
- .Atom(0, multiFields.Child(field)->Content())
- .Callable(1, "Nth")
- .Apply(0, *finishLambda)
- .With(0)
- .Do(stateExtractor)
- .Done()
- .Seal()
- .Atom(1, ToString(field), TNodeFlags::Default)
- .Seal()
- .Seal();
- }
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePostAggregateMergePhase()
- {
- bool aggregateOnly = (Suffix != "");
- const auto& columnNames = aggregateOnly ? FinalColumnNames : InitialColumnNames;
- ui32 index = 0U;
- return Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("item")
- .Param("state")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(index++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- if (SessionWindowParams.Update) {
- parent
- .List(index++)
- .Atom(0, SessionStartMemberName)
- .Callable(1, "Member")
- .Arg(0, "state")
- .Atom(1, SessionStartMemberName)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- for (ui32 i = 0; i < columnNames.size(); ++i) {
- auto child = AggregatedColumns->Child(i);
- auto trait = Traits[i];
- if (!EffectiveCompact) {
- auto loadLambda = trait->Child(4);
- auto mergeLambda = trait->Child(5);
- auto extractorLambda = GetFinalAggStateExtractor(i);
- if (!DistinctFields.empty() || Suffix == "MergeManyFinalize") {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "OptionalReduce")
- .Callable(0, "Map")
- .Apply(0, extractorLambda)
- .With(0, "item")
- .Seal()
- .Add(1, loadLambda)
- .Seal()
- .Callable(1, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Add(2, mergeLambda)
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Apply(1, *mergeLambda)
- .With(0)
- .Apply(*loadLambda)
- .With(0)
- .Apply(extractorLambda)
- .With(0, "item")
- .Seal()
- .Done()
- .Seal()
- .Done()
- .With(1)
- .Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Done()
- .Seal()
- .Seal();
- }
- } else {
- auto updateLambda = trait->Child(2);
- auto distinctField = (child->ChildrenSize() == 3) ? child->Child(2) : nullptr;
- const bool isFirst = distinctField ? (*Distinct2Columns[distinctField->Content()].begin() == i) : false;
- auto updateApply = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Apply(1, *updateLambda)
- .With(0)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField) {
- parent
- .Callable("Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent
- .Callable("CastStruct")
- .Arg(0, "item")
- .Add(1, ExpandType(Node->Pos(), *updateLambda->Head().Head().GetTypeAnn(), Ctx))
- .Seal();
- }
- return parent;
- })
- .Done()
- .With(1)
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctField && isFirst) {
- parent.Callable("Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Atom(1, "1", TNodeFlags::Default)
- .Seal();
- } else {
- parent.Callable("Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal();
- }
- return parent;
- })
- .Done()
- .Do([&](TExprNodeReplaceBuilder& parent) -> TExprNodeReplaceBuilder& {
- if (updateLambda->Head().ChildrenSize() == 3) {
- parent
- .With(2)
- .Callable("Uint32")
- .Atom(0, ToString(i), TNodeFlags::Default)
- .Seal()
- .Done();
- }
- return parent;
- })
- .Seal();
- return parent;
- };
- if (distinctField) {
- auto distinctIndex = *Distinct2Columns[distinctField->Content()].begin();
- ui32 newValueIndex = 0;
- auto newValue = [&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- parent.Callable(newValueIndex, "NamedApply")
- .Add(0, UdfAddValue[distinctField->Content()])
- .List(1)
- .Callable(0, "Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[distinctIndex])
- .Seal()
- .Atom(1, "0", TNodeFlags::Default)
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(1, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal();
- return parent;
- };
- parent.List(index++)
- .Add(0, columnNames[i])
- .Callable(1, "If")
- .Callable(0, "NamedApply")
- .Add(0, UdfWasChanged[distinctField->Content()])
- .List(1)
- .Callable(0, "NamedApply")
- .Add(0, UdfAddValue[distinctField->Content()])
- .List(1)
- .Callable(0, "Nth")
- .Callable(0, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[distinctIndex])
- .Seal()
- .Atom(1, "0", TNodeFlags::Default)
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (!DistinctFieldNeedsPickle[distinctField->Content()]) {
- parent.Callable(1, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal();
- } else {
- parent.Callable(1, "StablePickle")
- .Callable(0, "Member")
- .Arg(0, "item")
- .Add(1, distinctField)
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal()
- .Seal()
- .Callable(2, "AsStruct").Seal()
- .Seal()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- if (distinctIndex == i) {
- parent.List(1)
- .Do(newValue)
- .Do(updateApply)
- .Seal();
- } else {
- parent.Do(updateApply);
- }
- return parent;
- })
- .Callable(2, "Member")
- .Arg(0, "state")
- .Add(1, columnNames[i])
- .Seal()
- .Seal()
- .Seal();
- } else {
- parent.List(index++)
- .Add(0, columnNames[i])
- .Do(updateApply)
- .Seal();
- }
- }
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GenerateJustOverStates(const TExprNode::TPtr& input, const TIdxSet& indicies) {
- return Ctx.Builder(Node->Pos())
- .Callable("Map")
- .Add(0, input)
- .Lambda(1)
- .Param("row")
- .Callable("AsStruct")
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i = 0; i < KeyColumns->ChildrenSize(); ++i) {
- parent
- .List(pos++)
- .Add(0, KeyColumns->ChildPtr(i))
- .Callable(1, "Member")
- .Arg(0, "row")
- .Add(1, KeyColumns->ChildPtr(i))
- .Seal()
- .Seal();
- }
- for (ui32 i : indicies) {
- parent
- .List(pos++)
- .Add(0, InitialColumnNames[i])
- .Callable(1, "Just")
- .Callable(0, "Member")
- .Arg(0, "row")
- .Add(1, InitialColumnNames[i])
- .Seal()
- .Seal()
- .Seal();
- }
- return parent;
- })
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::SerializeIdxSet(const TIdxSet& indicies) {
- return Ctx.Builder(Node->Pos())
- .List()
- .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
- ui32 pos = 0;
- for (ui32 i : indicies) {
- parent.Atom(pos++, ToString(i));
- }
- return parent;
- })
- .Seal()
- .Build();
- }
- TExprNode::TPtr TAggregateExpander::GeneratePhases() {
- const TExprNode::TPtr cleanOutputSettings = RemoveSetting(*Node->Child(3), "output_columns", Ctx);
- const bool many = HaveDistinct;
- YQL_CLOG(DEBUG, Core) << "Aggregate: generate " << (many ? "phases with distinct" : "simple phases");
- TExprNode::TListType mergeTraits;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- auto originalTrait = AggregatedColumns->Child(index)->ChildPtr(1);
- auto extractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, InitialColumnNames[index])
- .Seal()
- .Seal()
- .Build();
- if (many) {
- extractor = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Callable("Unwrap")
- .Apply(0, extractor)
- .With(0, "row")
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- bool isAggApply = originalTrait->IsCallable("AggApply");
- auto serializedStateType = isAggApply ? AggApplySerializedStateType(originalTrait, Ctx) : originalTrait->Child(3)->GetTypeAnn();
- if (many) {
- serializedStateType = Ctx.MakeType<TOptionalExprType>(serializedStateType);
- }
- auto extractorTypeNode = Ctx.Builder(Node->Pos())
- .Callable("StructType")
- .List(0)
- .Add(0, InitialColumnNames[index])
- .Add(1, ExpandType(Node->Pos(), *serializedStateType, Ctx))
- .Seal()
- .Seal()
- .Build();
- if (isAggApply) {
- auto initialType = originalTrait->GetTypeAnn();
- if (many) {
- initialType = Ctx.MakeType<TOptionalExprType>(initialType);
- }
- auto originalExtractorTypeNode = Ctx.Builder(Node->Pos())
- .Callable("StructType")
- .List(0)
- .Add(0, InitialColumnNames[index])
- .Add(1, ExpandType(Node->Pos(), *initialType, Ctx))
- .Seal()
- .Seal()
- .Build();
- auto name = TString(originalTrait->ChildPtr(0)->Content());
- if (name.StartsWith("pg_")) {
- auto func = name.substr(3);
- TVector<ui32> argTypes;
- bool needRetype = false;
- auto status = ExtractPgTypesFromMultiLambda(originalTrait->ChildRef(2), argTypes, needRetype, Ctx);
- YQL_ENSURE(status == IGraphTransformer::TStatus::Ok);
- const NPg::TAggregateDesc& aggDesc = NPg::LookupAggregation(TString(func), argTypes);
- name = "pg_" + aggDesc.Name + "#" + ToString(aggDesc.AggId);
- }
- mergeTraits.push_back(Ctx.Builder(Node->Pos())
- .Callable(many ? "AggApplyManyState" : "AggApplyState")
- .Atom(0, name)
- .Add(1, extractorTypeNode)
- .Add(2, extractor)
- .Add(3, originalExtractorTypeNode)
- .Seal()
- .Build());
- } else {
- YQL_ENSURE(originalTrait->IsCallable("AggregationTraits"));
- mergeTraits.push_back(Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Add(0, extractorTypeNode)
- .Add(1, extractor)
- .Lambda(2)
- .Param("item")
- .Param("state")
- .Callable("Void")
- .Seal()
- .Seal()
- .Add(3, originalTrait->ChildPtr(3))
- .Add(4, originalTrait->ChildPtr(4))
- .Add(5, originalTrait->ChildPtr(5))
- .Add(6, originalTrait->ChildPtr(6))
- .Add(7, originalTrait->ChildPtr(7))
- .Seal()
- .Build());
- }
- }
- TExprNode::TListType finalizeColumns;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- finalizeColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, AggregatedColumns->Child(index)->ChildPtr(0))
- .Add(1, mergeTraits[index])
- .Seal()
- .Build());
- }
- if (!many) {
- // simple Combine + MergeFinalize
- TExprNode::TListType combineColumns;
- for (ui32 index = 0; index < AggregatedColumns->ChildrenSize(); ++index) {
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[index])
- .Add(1, AggregatedColumns->Child(index)->ChildPtr(1))
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, AggList)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- auto mergeFinalize = Ctx.Builder(Node->Pos())
- .Callable("AggregateMergeFinalize")
- .Add(0, combine)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- return mergeFinalize;
- }
- // process with distincts
- // Combine + Map with Just over states
- // for each distinct field:
- // Aggregate by keys + field w/o aggs
- // Combine by keys + field with aggs
- // Map with Just over states
- // UnionAll
- // MergeManyFinalize
- TExprNode::TListType unionAllInputs;
- TExprNode::TListType streams;
- if (!NonDistinctColumns.empty()) {
- TExprNode::TListType combineColumns;
- for (ui32 i : NonDistinctColumns) {
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[i])
- .Add(1, AggregatedColumns->Child(i)->ChildPtr(1))
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, AggList)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- unionAllInputs.push_back(GenerateJustOverStates(combine, NonDistinctColumns));
- streams.push_back(SerializeIdxSet(NonDistinctColumns));
- }
- for (ui32 index = 0; index < DistinctFields.size(); ++index) {
- auto distinctField = DistinctFields[index];
- auto& indicies = Distinct2Columns[distinctField->Content()];
- TExprNode::TListType allKeyColumns = KeyColumns->ChildrenList();
- allKeyColumns.push_back(distinctField);
- auto distinct = Ctx.Builder(Node->Pos())
- .Callable("Aggregate")
- .Add(0, AggList)
- .Add(1, Ctx.NewList(Node->Pos(), std::move(allKeyColumns)))
- .List(2)
- .Seal()
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- TExprNode::TListType combineColumns;
- for (ui32 i : indicies) {
- auto trait = AggregatedColumns->Child(i)->ChildPtr(1);
- bool isAggApply = trait->IsCallable("AggApply");
- if (isAggApply) {
- trait = Ctx.Builder(Node->Pos())
- .Callable("AggApply")
- .Add(0, trait->ChildPtr(0))
- .Callable(1, "StructType")
- .List(0)
- .Add(0, distinctField)
- .Add(1, trait->ChildPtr(1))
- .Seal()
- .Seal()
- .Lambda(2)
- .Param("row")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- } else {
- TExprNode::TPtr newInit;
- if (trait->ChildPtr(1)->Head().ChildrenSize() == 1) {
- newInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Apply(trait->ChildPtr(1))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Build();
- } else {
- newInit = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("parent")
- .Apply(trait->ChildPtr(1))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "parent")
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr newUpdate;
- if (trait->ChildPtr(2)->Head().ChildrenSize() == 2) {
- newUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("state")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "state")
- .Seal()
- .Seal()
- .Build();
- } else {
- newUpdate = Ctx.Builder(Node->Pos())
- .Lambda()
- .Param("row")
- .Param("state")
- .Param("parent")
- .Apply(trait->ChildPtr(2))
- .With(0)
- .Callable("Member")
- .Arg(0, "row")
- .Add(1, distinctField)
- .Seal()
- .Done()
- .With(1, "state")
- .With(2, "parent")
- .Seal()
- .Seal()
- .Build();
- }
- trait = Ctx.Builder(Node->Pos())
- .Callable("AggregationTraits")
- .Callable(0, "StructType")
- .List(0)
- .Add(0, distinctField)
- .Add(1, trait->ChildPtr(0))
- .Seal()
- .Seal()
- .Add(1, newInit)
- .Add(2, newUpdate)
- .Add(3, trait->ChildPtr(3))
- .Add(4, trait->ChildPtr(4))
- .Add(5, trait->ChildPtr(5))
- .Add(6, trait->ChildPtr(6))
- .Add(7, trait->ChildPtr(7))
- .Seal()
- .Build();
- }
- combineColumns.push_back(Ctx.Builder(Node->Pos())
- .List()
- .Add(0, InitialColumnNames[i])
- .Add(1, trait)
- .Seal()
- .Build());
- }
- auto combine = Ctx.Builder(Node->Pos())
- .Callable("AggregateCombine")
- .Add(0, distinct)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(combineColumns)))
- .Add(3, cleanOutputSettings)
- .Seal()
- .Build();
- unionAllInputs.push_back(GenerateJustOverStates(combine, indicies));
- streams.push_back(SerializeIdxSet(indicies));
- }
- if (UseBlocks) {
- for (ui32 i = 0; i < unionAllInputs.size(); ++i) {
- unionAllInputs[i] = Ctx.Builder(Node->Pos())
- .Callable("Map")
- .Add(0, unionAllInputs[i])
- .Lambda(1)
- .Param("row")
- .Callable("AddMember")
- .Arg(0, "row")
- .Atom(1, "_yql_group_stream_index")
- .Callable(2, "Uint32")
- .Atom(0, ToString(i))
- .Seal()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- }
- auto settings = cleanOutputSettings;
- if (UseBlocks) {
- settings = AddSetting(*settings, Node->Pos(), "many_streams", Ctx.NewList(Node->Pos(), std::move(streams)), Ctx);
- }
- auto unionAll = Ctx.NewCallable(Node->Pos(), "UnionAll", std::move(unionAllInputs));
- auto mergeManyFinalize = Ctx.Builder(Node->Pos())
- .Callable("AggregateMergeManyFinalize")
- .Add(0, unionAll)
- .Add(1, KeyColumns)
- .Add(2, Ctx.NewList(Node->Pos(), std::move(finalizeColumns)))
- .Add(3, settings)
- .Seal()
- .Build();
- return mergeManyFinalize;
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockCombine() {
- if (HaveSessionSetting || HaveDistinct) {
- return nullptr;
- }
- for (const auto& x : AggregatedColumns->Children()) {
- auto trait = x->ChildPtr(1);
- if (!trait->IsCallable("AggApply")) {
- return nullptr;
- }
- }
- return TryGenerateBlockCombineAllOrHashed();
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalize() {
- if (UsePartitionsByKeys || !UseBlocks) {
- return nullptr;
- }
- if (HaveSessionSetting || HaveDistinct) {
- return nullptr;
- }
- for (const auto& x : AggregatedColumns->Children()) {
- auto trait = x->ChildPtr(1);
- if (!trait->IsCallable({ "AggApplyState", "AggApplyManyState" })) {
- return nullptr;
- }
- }
- return TryGenerateBlockMergeFinalizeHashed();
- }
- TExprNode::TPtr TAggregateExpander::TryGenerateBlockMergeFinalizeHashed() {
- if (!TypesCtx.ArrowResolver) {
- return nullptr;
- }
- if (KeyColumns->ChildrenSize() == 0) {
- return nullptr;
- }
- bool isMany = Suffix == "MergeManyFinalize";
- auto streamArg = Ctx.NewArgument(Node->Pos(), "stream");
- TExprNode::TListType keyIdxs;
- TVector<TString> outputColumns;
- TExprNode::TListType aggs;
- ui32 streamIdxColumn;
- auto blocks = MakeInputBlocks(streamArg, keyIdxs, outputColumns, aggs, true, isMany, &streamIdxColumn);
- if (!blocks) {
- return nullptr;
- }
- TExprNode::TPtr aggBlocks;
- if (!isMany) {
- aggBlocks = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockMergeFinalizeHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Seal()
- .Seal()
- .Build();
- } else {
- auto manyStreamsSetting = GetSetting(*Node->Child(3), "many_streams");
- YQL_ENSURE(manyStreamsSetting, "Missing many_streams setting");
- aggBlocks = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "BlockMergeManyFinalizeHashed")
- .Callable(0, "FromFlow")
- .Add(0, blocks)
- .Seal()
- .Add(1, Ctx.NewList(Node->Pos(), std::move(keyIdxs)))
- .Add(2, Ctx.NewList(Node->Pos(), std::move(aggs)))
- .Atom(3, ToString(streamIdxColumn))
- .Add(4, manyStreamsSetting->TailPtr())
- .Seal()
- .Seal()
- .Build();
- }
- auto aggWideFlow = Ctx.Builder(Node->Pos())
- .Callable("ToFlow")
- .Callable(0, "WideFromBlocks")
- .Callable(0, "FromFlow")
- .Add(0, aggBlocks)
- .Seal()
- .Seal()
- .Seal()
- .Build();
- auto finalFlow = MakeNarrowMap(Node->Pos(), outputColumns, aggWideFlow, Ctx);
- auto root = Ctx.NewCallable(Node->Pos(), "FromFlow", { finalFlow });
- auto lambdaStream = Ctx.NewLambda(Node->Pos(), Ctx.NewArguments(Node->Pos(), { streamArg }), std::move(root));
- auto keySelector = BuildKeySelector(Node->Pos(), *OriginalRowType, KeyColumns, Ctx);
- return Ctx.Builder(Node->Pos())
- .Callable("ShuffleByKeys")
- .Add(0, AggList)
- .Add(1, keySelector)
- .Lambda(2)
- .Param("stream")
- .Apply(GetContextLambda())
- .With(0)
- .Apply(lambdaStream)
- .With(0, "stream")
- .Seal()
- .Done()
- .Seal()
- .Seal()
- .Seal()
- .Build();
- }
- TExprNode::TPtr ExpandAggregatePeephole(const TExprNode::TPtr& node, TExprContext& ctx, TTypeAnnotationContext& typesCtx) {
- if (NNodes::TCoAggregate::Match(node.Get())) {
- NNodes::TCoAggregate self(node);
- auto ret = TAggregateExpander::CountAggregateRewrite(self, ctx, typesCtx.IsBlockEngineEnabled());
- if (ret != node) {
- YQL_CLOG(DEBUG, Core) << "CountAggregateRewrite on peephole";
- return ret;
- }
- }
- return ExpandAggregatePeepholeImpl(node, ctx, typesCtx, false, typesCtx.IsBlockEngineEnabled(), false);
- }
- } // namespace NYql
|