Browse Source

KIKIMR-19213: cleaning

ivanmorozov 1 year ago
parent
commit
9702a974d6

+ 0 - 2
ydb/core/formats/arrow/arrow_filter.h

@@ -154,8 +154,6 @@ public:
 
     const std::vector<bool>& BuildSimpleFilter(const ui32 expectedSize) const;
 
-    TColumnFilter() = default;
-
     std::shared_ptr<arrow::BooleanArray> BuildArrowFilter(const ui32 expectedSize) const;
 
     bool IsTotalAllowFilter() const;

+ 10 - 10
ydb/core/formats/arrow/arrow_helpers.cpp

@@ -908,26 +908,26 @@ std::shared_ptr<arrow::RecordBatch> ReallocateBatch(std::shared_ptr<arrow::Recor
     return DeserializeBatch(SerializeBatch(original, arrow::ipc::IpcWriteOptions::Defaults()), original->schema());
 }
 
-std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& rb) {
+std::shared_ptr<arrow::RecordBatch> MergeColumns(const std::vector<std::shared_ptr<arrow::RecordBatch>>& batches) {
     std::vector<std::shared_ptr<arrow::Array>> columns;
     std::vector<std::shared_ptr<arrow::Field>> fields;
     std::optional<ui32> recordsCount;
     std::set<std::string> columnNames;
-    for (auto&& i : rb) {
-        if (!i) {
+    for (auto&& batch : batches) {
+        if (!batch) {
             continue;
         }
-        for (auto&& c : i->columns()) {
-            columns.emplace_back(c);
+        for (auto&& column : batch->columns()) {
+            columns.emplace_back(column);
             if (!recordsCount) {
-                recordsCount = c->length();
+                recordsCount = column->length();
             } else {
-                Y_VERIFY(*recordsCount == c->length());
+                Y_VERIFY(*recordsCount == column->length());
             }
         }
-        for (auto&& f : i->schema()->fields()) {
-            Y_VERIFY(columnNames.emplace(f->name()).second);
-            fields.emplace_back(f);
+        for (auto&& field : batch->schema()->fields()) {
+            AFL_VERIFY(columnNames.emplace(field->name()).second)("field_name", field->name());
+            fields.emplace_back(field);
         }
     }
     if (columns.empty()) {

+ 1 - 1
ydb/core/formats/arrow/permutations.cpp

@@ -6,7 +6,7 @@
 
 namespace NKikimr::NArrow {
 
-std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse) {
+std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse) {
     if (size < 1) {
         return {};
     }

+ 1 - 1
ydb/core/formats/arrow/permutations.h

@@ -5,7 +5,7 @@
 
 namespace NKikimr::NArrow {
 
-std::shared_ptr<arrow::UInt64Array> MakePermutation(int size, bool reverse = false);
+std::shared_ptr<arrow::UInt64Array> MakePermutation(const int size, const bool reverse = false);
 std::shared_ptr<arrow::UInt64Array> MakeFilterPermutation(const std::vector<ui64>& indexes);
 std::shared_ptr<arrow::UInt64Array> MakeSortPermutation(const std::shared_ptr<arrow::RecordBatch>& batch,
                                                         const std::shared_ptr<arrow::Schema>& sortingKey);

+ 6 - 3
ydb/core/formats/arrow/program.cpp

@@ -701,7 +701,7 @@ arrow::Status TProgramStep::ApplyFilters(TDatumBatch& batch) const {
         return arrow::Status::OK();
     }
 
-    NArrow::TColumnFilter bits;
+    NArrow::TColumnFilter bits = NArrow::TColumnFilter::BuildAllowFilter();
     auto status = MakeCombinedFilter(batch, bits);
     if (!status.ok()) {
         return status;
@@ -833,7 +833,7 @@ std::set<std::string> TProgramStep::GetColumnsInUsage() const {
 NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::RecordBatch>& srcBatch,
                                             arrow::compute::ExecContext* ctx) const
 {
-    NArrow::TColumnFilter result;
+    NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter();
     try {
         if (Steps.empty()) {
             return result;
@@ -849,7 +849,7 @@ NArrow::TColumnFilter TProgram::MakeEarlyFilter(const std::shared_ptr<arrow::Rec
         if (!step->ApplyAssignes(*rb, ctx).ok()) {
             return result;
         }
-        NArrow::TColumnFilter filter;
+        NArrow::TColumnFilter filter = NArrow::TColumnFilter::BuildAllowFilter();
         if (!step->MakeCombinedFilter(*rb, filter).ok()) {
             return result;
         }
@@ -865,6 +865,9 @@ std::set<std::string> TProgram::GetEarlyFilterColumns() const {
     if (Steps.empty()) {
         return result;
     }
+    if (Steps[0]->Filters.empty()) {
+        return result;
+    }
     return Steps[0]->GetColumnsInUsage();
 }
 

+ 3 - 2
ydb/core/tx/columnshard/engines/filter.cpp

@@ -1,9 +1,10 @@
 #include "filter.h"
 #include "defs.h"
-#include "indexed_read_data.h"
+#include "reader/read_metadata.h"
 
 #include <ydb/core/formats/arrow/arrow_helpers.h>
 #include <ydb/core/formats/arrow/custom_registry.h>
+#include <ydb/core/formats/arrow/program.h>
 
 namespace NKikimr::NOlap {
 
@@ -38,7 +39,7 @@ NArrow::TColumnFilter MakeSnapshotFilter(const std::shared_ptr<arrow::RecordBatc
     Y_VERIFY(snapSchema->num_fields() == 2);
     auto steps = batch->GetColumnByName(snapSchema->fields()[0]->name());
     auto ids = batch->GetColumnByName(snapSchema->fields()[1]->name());
-    NArrow::TColumnFilter result;
+    NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter();
     TSnapshotGetter getter(steps, ids, snapshot);
     result.Reset(steps->length(), std::move(getter));
     return result;

+ 1 - 1
ydb/core/tx/columnshard/engines/predicate/container.h

@@ -65,7 +65,7 @@ public:
 
     NKikimr::NArrow::TColumnFilter BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const {
         if (!Object) {
-            return NArrow::TColumnFilter();
+            return NArrow::TColumnFilter::BuildAllowFilter();
         }
         return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType);
     }

+ 1 - 1
ydb/core/tx/columnshard/engines/predicate/filter.cpp

@@ -5,7 +5,7 @@ namespace NKikimr::NOlap {
 
 NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(std::shared_ptr<arrow::RecordBatch> data) const {
     if (SortedRanges.empty()) {
-        return NArrow::TColumnFilter();
+        return NArrow::TColumnFilter::BuildAllowFilter();
     }
     NArrow::TColumnFilter result = SortedRanges.front().BuildFilter(data);
     for (ui32 i = 1; i < SortedRanges.size(); ++i) {