#include "mkql_block_trimmer.h" #include #include #include #include #include #include #include #include #include #include #include namespace NKikimr::NMiniKQL { class TBlockTrimmerBase : public IBlockTrimmer { protected: TBlockTrimmerBase(arrow::MemoryPool* pool) : Pool_(pool) {} TBlockTrimmerBase() = delete; std::shared_ptr TrimNullBitmap(const std::shared_ptr& array) { auto& nullBitmapBuffer = array->buffers[0]; std::shared_ptr result; auto nullCount = array->GetNullCount(); if (nullCount == array->length) { result = MakeDenseFalseBitmap(array->length, Pool_); } else if (nullCount > 0) { result = MakeDenseBitmapCopy(nullBitmapBuffer->data(), array->length, array->offset, Pool_); } return result; } protected: arrow::MemoryPool* Pool_; }; template class TFixedSizeBlockTrimmer : public TBlockTrimmerBase { public: TFixedSizeBlockTrimmer(arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) {} std::shared_ptr Trim(const std::shared_ptr& array) override { Y_ENSURE(array->buffers.size() == 2); Y_ENSURE(array->child_data.empty()); std::shared_ptr trimmedNullBitmap; if constexpr (Nullable) { trimmedNullBitmap = TrimNullBitmap(array); } auto origData = array->GetValues(1); auto dataSize = sizeof(TLayout) * array->length; auto trimmedDataBuffer = NUdf::AllocateResizableBuffer(dataSize, Pool_); memcpy(trimmedDataBuffer->mutable_data(), origData, dataSize); return arrow::ArrayData::Make(array->type, array->length, {std::move(trimmedNullBitmap), std::move(trimmedDataBuffer)}, array->GetNullCount()); } }; template class TResourceBlockTrimmer : public TBlockTrimmerBase { public: TResourceBlockTrimmer(arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) {} std::shared_ptr Trim(const std::shared_ptr& array) override { Y_ENSURE(array->buffers.size() == 2); Y_ENSURE(array->child_data.empty()); std::shared_ptr trimmedNullBitmap; if constexpr (Nullable) { trimmedNullBitmap = TrimNullBitmap(array); } auto origData = array->GetValues(1); auto dataSize = sizeof(NUdf::TUnboxedValue) * array->length; auto trimmedBuffer = NUdf::AllocateResizableBuffer>(dataSize, Pool_); ARROW_OK(trimmedBuffer->Resize(dataSize)); auto trimmedBufferData = reinterpret_cast(trimmedBuffer->mutable_data()); for (int64_t i = 0; i < array->length; i++) { ::new(&trimmedBufferData[i]) NUdf::TUnboxedValue(origData[i]); } return arrow::ArrayData::Make(array->type, array->length, {std::move(trimmedNullBitmap), std::move(trimmedBuffer)}, array->GetNullCount()); } }; template class TStringBlockTrimmer : public TBlockTrimmerBase { using TOffset = typename TStringType::offset_type; public: TStringBlockTrimmer(arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) {} std::shared_ptr Trim(const std::shared_ptr& array) override { Y_ENSURE(array->buffers.size() == 3); Y_ENSURE(array->child_data.empty()); std::shared_ptr trimmedNullBitmap; if constexpr (Nullable) { trimmedNullBitmap = TrimNullBitmap(array); } auto origOffsetData = array->GetValues(1); auto origStringData = reinterpret_cast(array->buffers[2]->data() + origOffsetData[0]); auto stringDataSize = origOffsetData[array->length] - origOffsetData[0]; auto trimmedOffsetBuffer = NUdf::AllocateResizableBuffer(sizeof(TOffset) * (array->length + 1), Pool_); auto trimmedStringBuffer = NUdf::AllocateResizableBuffer(stringDataSize, Pool_); auto trimmedOffsetBufferData = reinterpret_cast(trimmedOffsetBuffer->mutable_data()); auto trimmedStringBufferData = reinterpret_cast(trimmedStringBuffer->mutable_data()); for (int64_t i = 0; i < array->length + 1; i++) { trimmedOffsetBufferData[i] = origOffsetData[i] - origOffsetData[0]; } memcpy(trimmedStringBufferData, origStringData, stringDataSize); return arrow::ArrayData::Make(array->type, array->length, {std::move(trimmedNullBitmap), std::move(trimmedOffsetBuffer), std::move(trimmedStringBuffer)}, array->GetNullCount()); } }; template class TTupleBlockTrimmer : public TBlockTrimmerBase { public: TTupleBlockTrimmer(std::vector children, arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) , Children_(std::move(children)) {} std::shared_ptr Trim(const std::shared_ptr& array) override { Y_ENSURE(array->buffers.size() == 1); std::shared_ptr trimmedNullBitmap; if constexpr (Nullable) { trimmedNullBitmap = TrimNullBitmap(array); } std::vector> trimmedChildren; Y_ENSURE(array->child_data.size() == Children_.size()); for (size_t i = 0; i < Children_.size(); i++) { trimmedChildren.push_back(Children_[i]->Trim(array->child_data[i])); } return arrow::ArrayData::Make(array->type, array->length, {std::move(trimmedNullBitmap)}, std::move(trimmedChildren), array->GetNullCount()); } protected: TTupleBlockTrimmer(arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) {} protected: std::vector Children_; }; template class TTzDateBlockTrimmer : public TTupleBlockTrimmer { using TBase = TTupleBlockTrimmer; using TDateLayout = typename NUdf::TDataType::TLayout; public: TTzDateBlockTrimmer(arrow::MemoryPool* pool) : TBase(pool) { this->Children_.push_back(std::make_unique>(pool)); this->Children_.push_back(std::make_unique>(pool)); } }; class TExternalOptionalBlockTrimmer : public TBlockTrimmerBase { public: TExternalOptionalBlockTrimmer(IBlockTrimmer::TPtr inner, arrow::MemoryPool* pool) : TBlockTrimmerBase(pool) , Inner_(std::move(inner)) {} std::shared_ptr Trim(const std::shared_ptr& array) override { Y_ENSURE(array->buffers.size() == 1); Y_ENSURE(array->child_data.size() == 1); auto trimmedNullBitmap = TrimNullBitmap(array); auto trimmedInner = Inner_->Trim(array->child_data[0]); return arrow::ArrayData::Make(array->type, array->length, {std::move(trimmedNullBitmap)}, {std::move(trimmedInner)}, array->GetNullCount()); } private: IBlockTrimmer::TPtr Inner_; }; struct TTrimmerTraits { using TResult = IBlockTrimmer; template using TTuple = TTupleBlockTrimmer; template using TFixedSize = TFixedSizeBlockTrimmer; template using TStrings = TStringBlockTrimmer; using TExtOptional = TExternalOptionalBlockTrimmer; template using TResource = TResourceBlockTrimmer; template using TTzDateReader = TTzDateBlockTrimmer; constexpr static bool PassType = false; static TResult::TPtr MakePg(const NUdf::TPgTypeDescription& desc, const NUdf::IPgBuilder* pgBuilder, arrow::MemoryPool* pool) { Y_UNUSED(pgBuilder); if (desc.PassByValue) { return std::make_unique>(pool); } else { return std::make_unique>(pool); } } static TResult::TPtr MakeResource(bool isOptional, arrow::MemoryPool* pool) { if (isOptional) { return std::make_unique>(pool); } else { return std::make_unique>(pool); } } template static TResult::TPtr MakeTzDate(bool isOptional, arrow::MemoryPool* pool) { if (isOptional) { return std::make_unique>(pool); } else { return std::make_unique>(pool); } } }; IBlockTrimmer::TPtr MakeBlockTrimmer(const NUdf::ITypeInfoHelper& typeInfoHelper, const NUdf::TType* type, arrow::MemoryPool* pool) { return DispatchByArrowTraits(typeInfoHelper, type, nullptr, pool); } }