mkql_block_agg_some.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #include "mkql_block_agg_some.h"
  2. #include <yql/essentials/minikql/mkql_node_builder.h>
  3. #include <yql/essentials/minikql/mkql_node_cast.h>
  4. #include <yql/essentials/minikql/computation/mkql_block_reader.h>
  5. #include <yql/essentials/minikql/computation/mkql_block_builder.h>
  6. namespace NKikimr {
  7. namespace NMiniKQL {
  8. namespace {
  9. using TGenericState = NUdf::TUnboxedValuePod;
  10. void PushValueToState(TGenericState* typedState, const arrow::Datum& datum, ui64 row, IBlockReader& reader,
  11. IBlockItemConverter& converter, TComputationContext& ctx)
  12. {
  13. if (datum.is_scalar()) {
  14. if (datum.scalar()->is_valid) {
  15. auto item = reader.GetScalarItem(*datum.scalar());
  16. *typedState = converter.MakeValue(item, ctx.HolderFactory);
  17. }
  18. } else {
  19. const auto& array = datum.array();
  20. TBlockItem curr = reader.GetItem(*array, row);
  21. if (curr) {
  22. *typedState = converter.MakeValue(curr, ctx.HolderFactory);
  23. }
  24. }
  25. }
  26. class TGenericColumnBuilder : public IAggColumnBuilder {
  27. public:
  28. TGenericColumnBuilder(ui64 size, TType* columnType, TComputationContext& ctx)
  29. : Builder_(MakeArrayBuilder(TTypeInfoHelper(), columnType, ctx.ArrowMemoryPool, size, &ctx.Builder->GetPgBuilder()))
  30. , Ctx_(ctx)
  31. {
  32. }
  33. void Add(const void* state) final {
  34. Builder_->Add(*static_cast<const TGenericState*>(state));
  35. }
  36. NUdf::TUnboxedValue Build() final {
  37. return Ctx_.HolderFactory.CreateArrowBlock(Builder_->Build(true));
  38. }
  39. private:
  40. const std::unique_ptr<IArrayBuilder> Builder_;
  41. TComputationContext& Ctx_;
  42. };
  43. template<typename TTag>
  44. class TSomeBlockGenericAggregator;
  45. template<>
  46. class TSomeBlockGenericAggregator<TCombineAllTag> : public TCombineAllTag::TBase {
  47. public:
  48. using TBase = TCombineAllTag::TBase;
  49. TSomeBlockGenericAggregator(TType* type, std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
  50. : TBase(sizeof(TGenericState), filterColumn, ctx)
  51. , ArgColumn_(argColumn)
  52. , Reader_(MakeBlockReader(TTypeInfoHelper(), type))
  53. , Converter_(MakeBlockItemConverter(TTypeInfoHelper(), type, ctx.Builder->GetPgBuilder()))
  54. {
  55. }
  56. void InitState(void* state) final {
  57. new(state) TGenericState();
  58. }
  59. void DestroyState(void* state) noexcept final {
  60. auto typedState = static_cast<TGenericState*>(state);
  61. typedState->DeleteUnreferenced();
  62. *typedState = TGenericState();
  63. }
  64. void AddMany(void* state, const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
  65. TGenericState& typedState = *static_cast<TGenericState*>(state);
  66. if (typedState) {
  67. return;
  68. }
  69. Y_UNUSED(batchLength);
  70. const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
  71. if (datum.is_scalar()) {
  72. if (datum.scalar()->is_valid) {
  73. auto item = Reader_->GetScalarItem(*datum.scalar());
  74. typedState = Converter_->MakeValue(item, Ctx_.HolderFactory);
  75. }
  76. } else {
  77. const auto& array = datum.array();
  78. auto len = array->length;
  79. const ui8* filterBitmap = nullptr;
  80. if (filtered) {
  81. const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
  82. const auto& filterArray = filterDatum.array();
  83. MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
  84. filterBitmap = filterArray->template GetValues<uint8_t>(1);
  85. }
  86. for (auto i = 0; i < len; ++i) {
  87. TBlockItem curr = Reader_->GetItem(*array, i);
  88. if (curr && (!filterBitmap || filterBitmap[i])) {
  89. typedState = Converter_->MakeValue(curr, Ctx_.HolderFactory);
  90. break;
  91. }
  92. }
  93. }
  94. }
  95. NUdf::TUnboxedValue FinishOne(const void *state) final {
  96. auto typedState = *static_cast<const TGenericState *>(state);
  97. return typedState;
  98. }
  99. private:
  100. const ui32 ArgColumn_;
  101. const std::unique_ptr<IBlockReader> Reader_;
  102. const std::unique_ptr<IBlockItemConverter> Converter_;
  103. };
  104. template<>
  105. class TSomeBlockGenericAggregator<TCombineKeysTag> : public TCombineKeysTag::TBase {
  106. public:
  107. using TBase = TCombineKeysTag::TBase;
  108. TSomeBlockGenericAggregator(TType* type, std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
  109. : TBase(sizeof(TGenericState), filterColumn, ctx)
  110. , ArgColumn_(argColumn)
  111. , Type_(type)
  112. , Reader_(MakeBlockReader(TTypeInfoHelper(), type))
  113. , Converter_(MakeBlockItemConverter(TTypeInfoHelper(), type, ctx.Builder->GetPgBuilder()))
  114. {
  115. }
  116. void InitKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
  117. new(state) TGenericState();
  118. UpdateKey(state, batchNum, columns, row);
  119. }
  120. void DestroyState(void* state) noexcept final {
  121. auto typedState = static_cast<TGenericState*>(state);
  122. typedState->DeleteUnreferenced();
  123. *typedState = TGenericState();
  124. }
  125. void UpdateKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
  126. Y_UNUSED(batchNum);
  127. auto typedState = static_cast<TGenericState*>(state);
  128. if (*typedState) {
  129. return;
  130. }
  131. const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
  132. PushValueToState(typedState, datum, row, *Reader_, *Converter_, Ctx_);
  133. }
  134. std::unique_ptr<IAggColumnBuilder> MakeStateBuilder(ui64 size) final {
  135. return std::make_unique<TGenericColumnBuilder>(size, Type_, Ctx_);
  136. }
  137. private:
  138. const ui32 ArgColumn_;
  139. TType* const Type_;
  140. const std::unique_ptr<IBlockReader> Reader_;
  141. const std::unique_ptr<IBlockItemConverter> Converter_;
  142. };
  143. template<>
  144. class TSomeBlockGenericAggregator<TFinalizeKeysTag> : public TFinalizeKeysTag::TBase {
  145. public:
  146. using TBase = TFinalizeKeysTag::TBase;
  147. TSomeBlockGenericAggregator(TType* type, std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
  148. : TBase(sizeof(TGenericState), filterColumn, ctx)
  149. , ArgColumn_(argColumn)
  150. , Type_(type)
  151. , Reader_(MakeBlockReader(TTypeInfoHelper(), type))
  152. , Converter_(MakeBlockItemConverter(TTypeInfoHelper(), type, ctx.Builder->GetPgBuilder()))
  153. {
  154. }
  155. void LoadState(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
  156. new(state) TGenericState();
  157. UpdateState(state, batchNum, columns, row);
  158. }
  159. void DestroyState(void* state) noexcept final {
  160. auto typedState = static_cast<TGenericState*>(state);
  161. typedState->DeleteUnreferenced();
  162. *typedState = TGenericState();
  163. }
  164. void UpdateState(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
  165. Y_UNUSED(batchNum);
  166. auto typedState = static_cast<TGenericState*>(state);
  167. if (*typedState) {
  168. return;
  169. }
  170. const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
  171. PushValueToState(typedState, datum, row, *Reader_, *Converter_, Ctx_);
  172. }
  173. std::unique_ptr<IAggColumnBuilder> MakeResultBuilder(ui64 size) final {
  174. return std::make_unique<TGenericColumnBuilder>(size, Type_, Ctx_);
  175. }
  176. private:
  177. const ui32 ArgColumn_;
  178. TType* const Type_;
  179. const std::unique_ptr<IBlockReader> Reader_;
  180. const std::unique_ptr<IBlockItemConverter> Converter_;
  181. };
  182. template <typename TTag>
  183. class TPreparedSomeBlockGenericAggregator : public TTag::TPreparedAggregator {
  184. public:
  185. using TBase = typename TTag::TPreparedAggregator;
  186. TPreparedSomeBlockGenericAggregator(TType* type, std::optional<ui32> filterColumn, ui32 argColumn)
  187. : TBase(sizeof(TGenericState))
  188. , Type_(type)
  189. , FilterColumn_(filterColumn)
  190. , ArgColumn_(argColumn)
  191. {}
  192. std::unique_ptr<typename TTag::TAggregator> Make(TComputationContext& ctx) const final {
  193. return std::make_unique<TSomeBlockGenericAggregator<TTag>>(Type_, FilterColumn_, ArgColumn_, ctx);
  194. }
  195. private:
  196. TType* const Type_;
  197. const std::optional<ui32> FilterColumn_;
  198. const ui32 ArgColumn_;
  199. };
  200. template <typename TTag>
  201. std::unique_ptr<typename TTag::TPreparedAggregator> PrepareSome(TTupleType* tupleType, std::optional<ui32> filterColumn, ui32 argColumn) {
  202. const auto blockType = AS_TYPE(TBlockType, tupleType->GetElementType(argColumn));
  203. const auto argType = blockType->GetItemType();
  204. return std::make_unique<TPreparedSomeBlockGenericAggregator<TTag>>(argType, filterColumn, argColumn);
  205. }
  206. class TBlockSomeFactory : public IBlockAggregatorFactory {
  207. std::unique_ptr<IPreparedBlockAggregator<IBlockAggregatorCombineAll>> PrepareCombineAll(
  208. TTupleType* tupleType,
  209. std::optional<ui32> filterColumn,
  210. const std::vector<ui32>& argsColumns,
  211. const TTypeEnvironment& env) const override {
  212. Y_UNUSED(env);
  213. return PrepareSome<TCombineAllTag>(tupleType, filterColumn, argsColumns[0]);
  214. }
  215. std::unique_ptr<IPreparedBlockAggregator<IBlockAggregatorCombineKeys>> PrepareCombineKeys(
  216. TTupleType* tupleType,
  217. const std::vector<ui32>& argsColumns,
  218. const TTypeEnvironment& env) const override {
  219. Y_UNUSED(env);
  220. return PrepareSome<TCombineKeysTag>(tupleType, std::optional<ui32>(), argsColumns[0]);
  221. }
  222. std::unique_ptr<IPreparedBlockAggregator<IBlockAggregatorFinalizeKeys>> PrepareFinalizeKeys(
  223. TTupleType* tupleType,
  224. const std::vector<ui32>& argsColumns,
  225. const TTypeEnvironment& env,
  226. TType* returnType,
  227. ui32 hint) const override {
  228. Y_UNUSED(env);
  229. Y_UNUSED(returnType);
  230. Y_UNUSED(hint);
  231. return PrepareSome<TFinalizeKeysTag>(tupleType, std::optional<ui32>(), argsColumns[0]);
  232. }
  233. };
  234. }
  235. std::unique_ptr<IBlockAggregatorFactory> MakeBlockSomeFactory() {
  236. return std::make_unique<TBlockSomeFactory>();
  237. }
  238. }
  239. }