123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- #include "mkql_block_agg_count.h"
- #include <yql/essentials/minikql/arrow/arrow_defs.h>
- #include <yql/essentials/minikql/computation/mkql_block_builder.h>
- namespace NKikimr {
- namespace NMiniKQL {
- namespace {
- struct TState {
- ui64 Count_ = 0;
- };
- class TColumnBuilder : public IAggColumnBuilder {
- public:
- TColumnBuilder(ui64 size, TComputationContext& ctx)
- : Builder_(TTypeInfoHelper(), arrow::uint64(), ctx.ArrowMemoryPool, size)
- , Ctx_(ctx)
- {
- }
- void Add(const void* state) final {
- auto typedState = static_cast<const TState*>(state);
- Builder_.Add(TBlockItem(typedState->Count_));
- }
- NUdf::TUnboxedValue Build() final {
- return Ctx_.HolderFactory.CreateArrowBlock(Builder_.Build(true));
- }
- private:
- NYql::NUdf::TFixedSizeArrayBuilder<ui64, false> Builder_;
- TComputationContext& Ctx_;
- };
- template <typename TTag>
- class TCountAllAggregator;
- template <typename TTag>
- class TCountAggregator;
- template <>
- class TCountAllAggregator<TCombineAllTag> : public TCombineAllTag::TBase {
- public:
- using TBase = TCombineAllTag::TBase;
- TCountAllAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(sizeof(TState), filterColumn, ctx)
- {
- Y_UNUSED(argColumn);
- }
- void InitState(void* state) final {
- new(state) TState();
- }
- void DestroyState(void* state) noexcept final {
- static_assert(std::is_trivially_destructible<TState>::value);
- Y_UNUSED(state);
- }
- void AddMany(void* state, const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- auto typedState = static_cast<TState*>(state);
- Y_UNUSED(columns);
- if (filtered) {
- typedState->Count_ += *filtered;
- }
- else {
- typedState->Count_ += batchLength;
- }
- }
- NUdf::TUnboxedValue FinishOne(const void* state) final {
- auto typedState = static_cast<const TState*>(state);
- return NUdf::TUnboxedValuePod(typedState->Count_);
- }
- };
- template <>
- class TCountAllAggregator<TCombineKeysTag> : public TCombineKeysTag::TBase {
- public:
- using TBase = TCombineKeysTag::TBase;
- TCountAllAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(sizeof(TState), filterColumn, ctx)
- {
- Y_UNUSED(argColumn);
- }
- void InitKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- new(state) TState();
- UpdateKey(state, batchNum, columns, row);
- }
- void DestroyState(void* state) noexcept final {
- static_assert(std::is_trivially_destructible<TState>::value);
- Y_UNUSED(state);
- }
- void UpdateKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- Y_UNUSED(batchNum);
- Y_UNUSED(columns);
- Y_UNUSED(row);
- auto typedState = static_cast<TState*>(state);
- typedState->Count_ += 1;
- }
- std::unique_ptr<IAggColumnBuilder> MakeStateBuilder(ui64 size) final {
- return std::make_unique<TColumnBuilder>(size, Ctx_);
- }
- };
- template <>
- class TCountAllAggregator<TFinalizeKeysTag> : public TFinalizeKeysTag::TBase {
- public:
- using TBase = TFinalizeKeysTag::TBase;
- TCountAllAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(sizeof(TState), filterColumn, ctx)
- , ArgColumn_(argColumn)
- {
- }
- void LoadState(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- new(state) TState();
- UpdateState(state, batchNum, columns, row);
- }
- void DestroyState(void* state) noexcept final {
- static_assert(std::is_trivially_destructible<TState>::value);
- Y_UNUSED(state);
- }
- void UpdateState(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- Y_UNUSED(batchNum);
- auto typedState = static_cast<TState*>(state);
- const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
- if (datum.is_scalar()) {
- MKQL_ENSURE(datum.scalar()->is_valid, "Expected not null");
- typedState->Count_ += datum.scalar_as<arrow::UInt64Scalar>().value;
- } else {
- const auto& array = datum.array();
- auto ptr = array->GetValues<ui64>(1);
- MKQL_ENSURE(array->GetNullCount() == 0, "Expected not null");
- typedState->Count_ += ptr[row];
- }
- }
- std::unique_ptr<IAggColumnBuilder> MakeResultBuilder(ui64 size) final {
- return std::make_unique<TColumnBuilder>(size, Ctx_);
- }
- private:
- const ui32 ArgColumn_;
- };
- template <>
- class TCountAggregator<TCombineAllTag> : public TCombineAllTag::TBase {
- public:
- using TBase = TCombineAllTag::TBase;
- TCountAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(sizeof(TState), filterColumn, ctx)
- , ArgColumn_(argColumn)
- {
- }
- void InitState(void* state) final {
- new(state) TState();
- }
- void DestroyState(void* state) noexcept final {
- static_assert(std::is_trivially_destructible<TState>::value);
- Y_UNUSED(state);
- }
- void AddMany(void* state, const NUdf::TUnboxedValue* columns, ui64 batchLength, std::optional<ui64> filtered) final {
- auto typedState = static_cast<TState*>(state);
- const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
- if (datum.is_scalar()) {
- if (datum.scalar()->is_valid) {
- typedState->Count_ += filtered ? *filtered : batchLength;
- }
- } else {
- const auto& array = datum.array();
- if (!filtered) {
- typedState->Count_ += array->length - array->GetNullCount();
- } else if (array->GetNullCount() == array->length) {
- // all nulls
- return;
- } else if (array->GetNullCount() == 0) {
- // no nulls
- typedState->Count_ += *filtered;
- } else {
- const auto& filterDatum = TArrowBlock::From(columns[*FilterColumn_]).GetDatum();
- // intersect masks from nulls and filter column
- const auto& filterArray = filterDatum.array();
- MKQL_ENSURE(filterArray->GetNullCount() == 0, "Expected non-nullable bool column");
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- const ui8* filterBitmap = filterArray->GetValues<uint8_t>(1);
- auto state = typedState->Count_;
- for (ui32 i = 0; i < array->length; ++i) {
- ui64 fullIndex = i + array->offset;
- auto bit1 = ((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
- auto bit2 = filterBitmap[i];
- state += bit1 & bit2;
- }
- typedState->Count_ = state;
- }
- }
- }
- NUdf::TUnboxedValue FinishOne(const void* state) final {
- auto typedState = static_cast<const TState*>(state);
- return NUdf::TUnboxedValuePod(typedState->Count_);
- }
- private:
- const ui32 ArgColumn_;
- };
- template <>
- class TCountAggregator<TCombineKeysTag> : public TCombineKeysTag::TBase {
- public:
- using TBase = TCombineKeysTag::TBase;
- TCountAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(sizeof(TState), filterColumn, ctx)
- , ArgColumn_(argColumn)
- {
- }
- void InitKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- new(state) TState();
- UpdateKey(state, batchNum, columns, row);
- }
- void DestroyState(void* state) noexcept final {
- static_assert(std::is_trivially_destructible<TState>::value);
- Y_UNUSED(state);
- }
- void UpdateKey(void* state, ui64 batchNum, const NUdf::TUnboxedValue* columns, ui64 row) final {
- Y_UNUSED(batchNum);
- auto typedState = static_cast<TState*>(state);
- const auto& datum = TArrowBlock::From(columns[ArgColumn_]).GetDatum();
- if (datum.is_scalar()) {
- if (datum.scalar()->is_valid) {
- typedState->Count_ += 1;
- }
- } else {
- const auto& array = datum.array();
- if (array->GetNullCount() == 0) {
- typedState->Count_ += 1;
- } else {
- auto nullBitmapPtr = array->GetValues<uint8_t>(0, 0);
- auto fullIndex = row + array->offset;
- auto bit = ((nullBitmapPtr[fullIndex >> 3] >> (fullIndex & 0x07)) & 1);
- typedState->Count_ += bit;
- }
- }
- }
- std::unique_ptr<IAggColumnBuilder> MakeStateBuilder(ui64 size) final {
- return std::make_unique<TColumnBuilder>(size, Ctx_);
- }
- private:
- const ui32 ArgColumn_;
- };
- template <>
- class TCountAggregator<TFinalizeKeysTag> : public TCountAllAggregator<TFinalizeKeysTag>
- {
- public:
- using TBase = TCountAllAggregator<TFinalizeKeysTag>;
- TCountAggregator(std::optional<ui32> filterColumn, ui32 argColumn, TComputationContext& ctx)
- : TBase(filterColumn, argColumn, ctx)
- {}
- };
- template <typename TTag>
- class TPreparedCountAll : public TTag::TPreparedAggregator {
- public:
- using TBase = typename TTag::TPreparedAggregator;
- TPreparedCountAll(std::optional<ui32> filterColumn, ui32 argColumn)
- : TBase(sizeof(TState))
- , FilterColumn_(filterColumn)
- , ArgColumn_(argColumn)
- {}
- std::unique_ptr<typename TTag::TAggregator> Make(TComputationContext& ctx) const final {
- return std::make_unique<TCountAllAggregator<TTag>>(FilterColumn_, ArgColumn_, ctx);
- }
- private:
- const std::optional<ui32> FilterColumn_;
- const ui32 ArgColumn_;
- };
- template <typename TTag>
- class TPreparedCount : public TTag::TPreparedAggregator {
- public:
- using TBase = typename TTag::TPreparedAggregator;
- TPreparedCount(std::optional<ui32> filterColumn, ui32 argColumn)
- : TBase(sizeof(TState))
- , FilterColumn_(filterColumn)
- , ArgColumn_(argColumn)
- {}
- std::unique_ptr<typename TTag::TAggregator> Make(TComputationContext& ctx) const final {
- return std::make_unique<TCountAggregator<TTag>>(FilterColumn_, ArgColumn_, ctx);
- }
- private:
- const std::optional<ui32> FilterColumn_;
- const ui32 ArgColumn_;
- };
- template <typename TTag>
- std::unique_ptr<typename TTag::TPreparedAggregator> PrepareCountAll(std::optional<ui32> filterColumn, ui32 argColumn) {
- return std::make_unique<TPreparedCountAll<TTag>>(filterColumn, argColumn);
- }
- template <typename TTag>
- std::unique_ptr<typename TTag::TPreparedAggregator> PrepareCount(std::optional<ui32> filterColumn, ui32 argColumn) {
- return std::make_unique<TPreparedCount<TTag>>(filterColumn, argColumn);
- }
- class TBlockCountAllFactory : public IBlockAggregatorFactory {
- public:
- std::unique_ptr<TCombineAllTag::TPreparedAggregator> PrepareCombineAll(
- TTupleType* tupleType,
- std::optional<ui32> filterColumn,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(argsColumns);
- Y_UNUSED(env);
- return PrepareCountAll<TCombineAllTag>(filterColumn, 0);
- }
- std::unique_ptr<TCombineKeysTag::TPreparedAggregator> PrepareCombineKeys(
- TTupleType* tupleType,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(argsColumns);
- Y_UNUSED(env);
- return PrepareCountAll<TCombineKeysTag>(std::optional<ui32>(), 0);
- }
- std::unique_ptr<TFinalizeKeysTag::TPreparedAggregator> PrepareFinalizeKeys(
- TTupleType* tupleType,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env,
- TType* returnType,
- ui32 hint) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(argsColumns);
- Y_UNUSED(env);
- Y_UNUSED(returnType);
- Y_UNUSED(hint);
- return PrepareCountAll<TFinalizeKeysTag>(std::optional<ui32>(), argsColumns[0]);
- }
- };
- class TBlockCountFactory : public IBlockAggregatorFactory {
- public:
- std::unique_ptr<TCombineAllTag::TPreparedAggregator> PrepareCombineAll(
- TTupleType* tupleType,
- std::optional<ui32> filterColumn,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(env);
- return PrepareCount<TCombineAllTag>(filterColumn, argsColumns[0]);
- }
- std::unique_ptr<TCombineKeysTag::TPreparedAggregator> PrepareCombineKeys(
- TTupleType* tupleType,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(argsColumns);
- Y_UNUSED(env);
- return PrepareCount<TCombineKeysTag>(std::optional<ui32>(), argsColumns[0]);
- }
- std::unique_ptr<TFinalizeKeysTag::TPreparedAggregator> PrepareFinalizeKeys(
- TTupleType* tupleType,
- const std::vector<ui32>& argsColumns,
- const TTypeEnvironment& env,
- TType* returnType,
- ui32 hint) const final {
- Y_UNUSED(tupleType);
- Y_UNUSED(argsColumns);
- Y_UNUSED(env);
- Y_UNUSED(returnType);
- Y_UNUSED(hint);
- return PrepareCount<TFinalizeKeysTag>(std::optional<ui32>(), argsColumns[0]);
- }
- };
- }
- std::unique_ptr<IBlockAggregatorFactory> MakeBlockCountAllFactory() {
- return std::make_unique<TBlockCountAllFactory>();
- }
- std::unique_ptr<IBlockAggregatorFactory> MakeBlockCountFactory() {
- return std::make_unique<TBlockCountFactory>();
- }
-
- }
- }
|