mkql_wide_condense.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #include "mkql_wide_condense.h"
  2. #include <yql/essentials/minikql/mkql_node_cast.h>
  3. #include <yql/essentials/minikql/mkql_node_builder.h>
  4. #include <yql/essentials/minikql/computation/mkql_computation_node_holders.h>
  5. #include <yql/essentials/minikql/computation/mkql_computation_node_codegen.h> // Y_IGNORE
  6. #include <yql/essentials/utils/cast.h>
  7. namespace NKikimr {
  8. namespace NMiniKQL {
  9. using NYql::EnsureDynamicCast;
  10. namespace {
  11. template <bool Interruptable, bool UseCtx>
  12. class TWideCondense1Wrapper : public TStatefulWideFlowCodegeneratorNode<TWideCondense1Wrapper<Interruptable, UseCtx>> {
  13. using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideCondense1Wrapper<Interruptable, UseCtx>>;
  14. public:
  15. TWideCondense1Wrapper(TComputationMutables& mutables, IComputationWideFlowNode* flow,
  16. TComputationExternalNodePtrVector&& items, TComputationNodePtrVector&& initState,
  17. TComputationExternalNodePtrVector&& state, IComputationNode* outSwitch, TComputationNodePtrVector&& updateState)
  18. : TBaseComputation(mutables, flow, EValueRepresentation::Embedded), Flow(flow)
  19. , Items(std::move(items))
  20. , InitState(std::move(initState))
  21. , State(std::move(state))
  22. , Switch(outSwitch)
  23. , UpdateState(std::move(updateState))
  24. , SwitchItem(IsPasstrought(Switch, Items))
  25. , ItemsOnInit(GetPasstroughtMap(Items, InitState))
  26. , ItemsOnUpdate(GetPasstroughtMap(Items, UpdateState))
  27. , UpdateOnItems(GetPasstroughtMap(UpdateState, Items))
  28. , WideFieldsIndex(mutables.IncrementWideFieldsIndex(Items.size()))
  29. , TempStateIndex(std::exchange(mutables.CurValueIndex, mutables.CurValueIndex + State.size()))
  30. {}
  31. EFetchResult DoCalculate(NUdf::TUnboxedValue& state, TComputationContext& ctx, NUdf::TUnboxedValue*const* output) const {
  32. if (state.IsFinish()) {
  33. return EFetchResult::Finish;
  34. } else if (state.HasValue() && state.Get<bool>()) {
  35. if constexpr (UseCtx) {
  36. CleanupCurrentContext();
  37. }
  38. state = NUdf::TUnboxedValuePod(false);
  39. for (ui32 i = 0U; i < State.size(); ++i)
  40. State[i]->SetValue(ctx, InitState[i]->GetValue(ctx));
  41. }
  42. auto** fields = ctx.WideFields.data() + WideFieldsIndex;
  43. while (true) {
  44. for (auto i = 0U; i < Items.size(); ++i)
  45. if (Items[i]->GetDependencesCount() > 0U || ItemsOnInit[i] || ItemsOnUpdate[i] || SwitchItem && i == *SwitchItem)
  46. fields[i] = &Items[i]->RefValue(ctx);
  47. switch (Flow->FetchValues(ctx, fields)) {
  48. case EFetchResult::Yield:
  49. return EFetchResult::Yield;
  50. case EFetchResult::Finish:
  51. break;
  52. case EFetchResult::One:
  53. if (state.IsInvalid()) {
  54. state = NUdf::TUnboxedValuePod(false);
  55. for (ui32 i = 0U; i < State.size(); ++i)
  56. State[i]->SetValue(ctx, InitState[i]->GetValue(ctx));
  57. } else {
  58. const auto& reset = Switch->GetValue(ctx);
  59. if (Interruptable && !reset) {
  60. break;
  61. }
  62. if (reset.template Get<bool>()) {
  63. for (const auto state : State) {
  64. if (const auto out = *output++) {
  65. *out = state->GetValue(ctx);
  66. }
  67. }
  68. state = NUdf::TUnboxedValuePod(true);
  69. return EFetchResult::One;
  70. }
  71. for (ui32 i = 0U; i < State.size(); ++i)
  72. ctx.MutableValues[TempStateIndex + i] = UpdateState[i]->GetValue(ctx);
  73. for (ui32 i = 0U; i < State.size(); ++i)
  74. State[i]->SetValue(ctx, std::move(ctx.MutableValues[TempStateIndex + i]));
  75. }
  76. continue;
  77. }
  78. break;
  79. }
  80. const bool empty = state.IsInvalid();
  81. state = NUdf::TUnboxedValuePod::MakeFinish();
  82. if (empty)
  83. return EFetchResult::Finish;
  84. for (const auto state : State) {
  85. if (const auto out = *output++) {
  86. *out = state->GetValue(ctx);
  87. }
  88. }
  89. return EFetchResult::One;
  90. }
  91. #ifndef MKQL_DISABLE_CODEGEN
  92. ICodegeneratorInlineWideNode::TGenerateResult DoGenGetValues(const TCodegenContext& ctx, Value* statePtr, BasicBlock*& block) const {
  93. auto& context = ctx.Codegen.GetContext();
  94. const auto init = BasicBlock::Create(context, "init", ctx.Func);
  95. const auto next = BasicBlock::Create(context, "next", ctx.Func);
  96. const auto work = BasicBlock::Create(context, "work", ctx.Func);
  97. const auto good = BasicBlock::Create(context, "good", ctx.Func);
  98. const auto stop = BasicBlock::Create(context, "stop", ctx.Func);
  99. const auto exit = BasicBlock::Create(context, "exit", ctx.Func);
  100. const auto valueType = Type::getInt128Ty(context);
  101. const auto state = new LoadInst(valueType, statePtr, "state", block);
  102. const auto resultType = Type::getInt32Ty(context);
  103. const auto result = PHINode::Create(resultType, 4U, "result", exit);
  104. result->addIncoming(ConstantInt::get(resultType, i32(EFetchResult::Finish)), block);
  105. const auto empty = PHINode::Create(Type::getInt1Ty(context), 3U, "empty", work);
  106. const auto bit = CastInst::Create(Instruction::Trunc, state, Type::getInt1Ty(context), "bit", block);
  107. empty->addIncoming(bit, block);
  108. const auto choise = SwitchInst::Create(state, work, 2U, block);
  109. choise->addCase(GetFinish(context), exit);
  110. choise->addCase(GetTrue(context), init);
  111. block = init;
  112. if constexpr (UseCtx) {
  113. const auto cleanup = ConstantInt::get(Type::getInt64Ty(context), GetMethodPtr(&CleanupCurrentContext));
  114. const auto cleanupType = FunctionType::get(Type::getVoidTy(context), {}, false);
  115. const auto cleanupPtr = CastInst::Create(Instruction::IntToPtr, cleanup, PointerType::getUnqual(cleanupType), "cleanup_ctx", block);
  116. CallInst::Create(cleanupType, cleanupPtr, {}, "", block);
  117. }
  118. new StoreInst(GetFalse(context), statePtr, block);
  119. for (ui32 i = 0U; i < State.size(); ++i) {
  120. EnsureDynamicCast<ICodegeneratorExternalNode*>(State[i])->CreateSetValue(ctx, block, GetNodeValue(InitState[i], ctx, block));
  121. }
  122. empty->addIncoming(ConstantInt::getFalse(context), block);
  123. BranchInst::Create(work, block);
  124. block = work;
  125. const auto getres = GetNodeValues(Flow, ctx, block);
  126. result->addIncoming(ConstantInt::get(resultType, i32(EFetchResult::Yield)), block);
  127. const auto action = SwitchInst::Create(getres.first, good, 2U, block);
  128. action->addCase(ConstantInt::get(resultType, i32(EFetchResult::Finish)), stop);
  129. action->addCase(ConstantInt::get(resultType, i32(EFetchResult::Yield)), exit);
  130. block = good;
  131. std::vector<Value*> items(Items.size(), nullptr);
  132. for (ui32 i = 0U; i < items.size(); ++i) {
  133. if (Items[i]->GetDependencesCount() > 0U || ItemsOnInit[i])
  134. EnsureDynamicCast<ICodegeneratorExternalNode*>(Items[i])->CreateSetValue(ctx, block, items[i] = getres.second[i](ctx, block));
  135. else if (ItemsOnUpdate[i] || SwitchItem && i == *SwitchItem)
  136. items[i] = getres.second[i](ctx, block);
  137. }
  138. BranchInst::Create(init, next, empty, block);
  139. block = next;
  140. const auto swap = BasicBlock::Create(context, "swap", ctx.Func);
  141. const auto skip = BasicBlock::Create(context, "skip", ctx.Func);
  142. const auto reset = SwitchItem ? items[*SwitchItem] : GetNodeValue(Switch, ctx, block);
  143. if constexpr (Interruptable) {
  144. const auto pass = BasicBlock::Create(context, "pass", ctx.Func);
  145. BranchInst::Create(stop, next, IsEmpty(reset, block, context), block);
  146. block = pass;
  147. }
  148. const auto cast = CastInst::Create(Instruction::Trunc, reset, Type::getInt1Ty(context), "bool", block);
  149. BranchInst::Create(swap, skip, cast, block);
  150. block = swap;
  151. new StoreInst(GetTrue(context), statePtr, block);
  152. result->addIncoming(ConstantInt::get(resultType, i32(EFetchResult::One)), block);
  153. BranchInst::Create(exit, block);
  154. block = skip;
  155. std::vector<Value*> updates(State.size(), nullptr);
  156. for (ui32 i = 0U; i < State.size(); ++i) {
  157. if (const auto map = UpdateOnItems[i])
  158. updates[i] = items[*map];
  159. else if (State[i] != UpdateState[i])
  160. updates[i] = GetNodeValue(UpdateState[i], ctx, block);
  161. }
  162. for (ui32 i = 0U; i < updates.size(); ++i) {
  163. if (const auto s = updates[i])
  164. EnsureDynamicCast<ICodegeneratorExternalNode*>(State[i])->CreateSetValue(ctx, block, s);
  165. }
  166. empty->addIncoming(ConstantInt::getFalse(context), block);
  167. BranchInst::Create(work, block);
  168. block = stop;
  169. new StoreInst(GetFinish(context), statePtr, block);
  170. const auto select = SelectInst::Create(empty, ConstantInt::get(resultType, i32(EFetchResult::Finish)), ConstantInt::get(resultType, i32(EFetchResult::One)), "output", block);
  171. result->addIncoming(select, block);
  172. BranchInst::Create(exit, block);
  173. block = exit;
  174. ICodegeneratorInlineWideNode::TGettersList getters;
  175. getters.reserve(State.size());
  176. std::transform(State.cbegin(), State.cend(), std::back_inserter(getters), [&](IComputationNode* node) {
  177. return [node](const TCodegenContext& ctx, BasicBlock*& block){ return GetNodeValue(node, ctx, block); };
  178. });
  179. return {result, std::move(getters)};
  180. }
  181. #endif
  182. private:
  183. void RegisterDependencies() const final {
  184. if (const auto flow = this->FlowDependsOn(Flow)) {
  185. std::for_each(Items.cbegin(), Items.cend(), std::bind(&TWideCondense1Wrapper::Own, flow, std::placeholders::_1));
  186. std::for_each(InitState.cbegin(), InitState.cend(), std::bind(&TWideCondense1Wrapper::DependsOn, flow, std::placeholders::_1));
  187. std::for_each(State.cbegin(), State.cend(), std::bind(&TWideCondense1Wrapper::Own, flow, std::placeholders::_1));
  188. TWideCondense1Wrapper::DependsOn(flow, Switch);
  189. std::for_each(UpdateState.cbegin(), UpdateState.cend(), std::bind(&TWideCondense1Wrapper::DependsOn, flow, std::placeholders::_1));
  190. }
  191. }
  192. IComputationWideFlowNode* const Flow;
  193. const TComputationExternalNodePtrVector Items;
  194. const TComputationNodePtrVector InitState;
  195. const TComputationExternalNodePtrVector State;
  196. IComputationNode* const Switch;
  197. const TComputationNodePtrVector UpdateState;
  198. const std::optional<size_t> SwitchItem;
  199. const TPasstroughtMap ItemsOnInit, ItemsOnUpdate, UpdateOnItems;
  200. ui32 WideFieldsIndex;
  201. ui32 TempStateIndex;
  202. };
  203. }
  204. IComputationNode* WrapWideCondense1(TCallable& callable, const TComputationNodeFactoryContext& ctx) {
  205. MKQL_ENSURE(callable.GetInputsCount() >= 2U, "Expected at least two args.");
  206. const auto inputWidth = GetWideComponentsCount(AS_TYPE(TFlowType, callable.GetInput(0U).GetStaticType()));
  207. const auto outputWidth = GetWideComponentsCount(AS_TYPE(TFlowType, callable.GetType()->GetReturnType()));
  208. const auto flow = LocateNode(ctx.NodeLocator, callable, 0U);
  209. TComputationNodePtrVector initState, updateState;
  210. initState.reserve(outputWidth);
  211. updateState.reserve(outputWidth);
  212. ui32 index = inputWidth;
  213. std::generate_n(std::back_inserter(initState), outputWidth, [&](){ return LocateNode(ctx.NodeLocator, callable, ++index); } );
  214. index += outputWidth;
  215. const auto outSwitch = LocateNode(ctx.NodeLocator, callable, ++index);
  216. bool isOptional;
  217. const auto dataType = UnpackOptionalData(callable.GetInput(index), isOptional);
  218. MKQL_ENSURE(dataType->GetSchemeType() == NUdf::TDataType<bool>::Id, "Expected bool.");
  219. std::generate_n(std::back_inserter(updateState), outputWidth, [&](){ return LocateNode(ctx.NodeLocator, callable, ++index); } );
  220. TComputationExternalNodePtrVector items, state;
  221. items.reserve(inputWidth);
  222. state.reserve(outputWidth);
  223. index = 0U;
  224. std::generate_n(std::back_inserter(items), inputWidth, [&](){ return LocateExternalNode(ctx.NodeLocator, callable, ++index); } );
  225. index += outputWidth;
  226. std::generate_n(std::back_inserter(state), outputWidth, [&](){ return LocateExternalNode(ctx.NodeLocator, callable, ++index); } );
  227. index = 2 + inputWidth + 3 * outputWidth;
  228. bool useCtx = false;
  229. if (index < callable.GetInputsCount()) {
  230. useCtx = AS_VALUE(TDataLiteral, callable.GetInput(index))->AsValue().Get<bool>();
  231. ++index;
  232. }
  233. if (useCtx) {
  234. if (const auto wide = dynamic_cast<IComputationWideFlowNode*>(flow)) {
  235. if (isOptional) {
  236. return new TWideCondense1Wrapper<true, true>(ctx.Mutables, wide, std::move(items), std::move(initState), std::move(state), outSwitch, std::move(updateState));
  237. } else {
  238. return new TWideCondense1Wrapper<false, true>(ctx.Mutables, wide, std::move(items), std::move(initState), std::move(state), outSwitch, std::move(updateState));
  239. }
  240. }
  241. } else {
  242. if (const auto wide = dynamic_cast<IComputationWideFlowNode*>(flow)) {
  243. if (isOptional) {
  244. return new TWideCondense1Wrapper<true, false>(ctx.Mutables, wide, std::move(items), std::move(initState), std::move(state), outSwitch, std::move(updateState));
  245. } else {
  246. return new TWideCondense1Wrapper<false, false>(ctx.Mutables, wide, std::move(items), std::move(initState), std::move(state), outSwitch, std::move(updateState));
  247. }
  248. }
  249. }
  250. THROW yexception() << "Expected wide flow.";
  251. }
  252. }
  253. }