Browse Source

Intermediate changes

robot-piglet 1 year ago
parent
commit
5f9ad6df80

+ 19 - 19
ya

@@ -11,42 +11,42 @@ HASH_PREFIX = 10
 
 REGISTRY_ENDPOINT = os.environ.get("YA_REGISTRY_ENDPOINT", "https://s3.mds.yandex.net/devtools-registry")
 
-
+# Please do not change this dict, it is updated automatically
+# Start of mapping
 PLATFORM_MAP = {
     "data": {
         "darwin": {
-            "md5": "1356748b6bb7ce19359bc1c87d81cf5e",
+            "md5": "2acd2fce860fe3cea3384eec9483e655",
             "urls": [
-                f"{REGISTRY_ENDPOINT}/4948880410",
-            ],
+                f"{REGISTRY_ENDPOINT}/4948877004"
+            ]
         },
         "darwin-arm64": {
-            "md5": "2ba999e77660102f78c2584931eef3a2",
+            "md5": "084a3e96c01df8ad5a086d124a7ded28",
             "urls": [
-                f"{REGISTRY_ENDPOINT}/4948879923",
-            ],
+                f"{REGISTRY_ENDPOINT}/4948876606"
+            ]
         },
         "linux-aarch64": {
-            "md5": "731df682969dddc89bd7f3160960ffa1",
+            "md5": "4d017714a552c91c5c72ff925156f90b",
             "urls": [
-                f"{REGISTRY_ENDPOINT}/4948879460",
-            ],
+                f"{REGISTRY_ENDPOINT}/4948875890"
+            ]
         },
         "win32-clang-cl": {
-            "md5": "e2289cac60bf800470d858af48fce490",
+            "md5": "518baa26eca7f2b5fc8fe6cfa3dba5fc",
             "urls": [
-                f"{REGISTRY_ENDPOINT}/4948880870",
-            ],
+                f"{REGISTRY_ENDPOINT}/4948877440"
+            ]
         },
         "linux": {
-            "md5": "c71678ed371ae966d7c0235352a398e3",
+            "md5": "8eb7379c081b853abf9cf61a4870523b",
             "urls": [
-                f"{REGISTRY_ENDPOINT}/4948881174",
-            ],
-        },
+                f"{REGISTRY_ENDPOINT}/4948877787"
+            ]
+        }
     }
-}
-# End of mapping
+} # End of mapping
 
 
 def create_dirs(path):

+ 0 - 1065
yt/yt/client/formats/arrow_writer.cpp

@@ -1,1065 +0,0 @@
-#include "arrow_writer.h"
-
-#include "public.h"
-#include "schemaless_writer_adapter.h"
-
-#include <yt/yt/client/arrow/fbs/Message.fbs.h>
-#include <yt/yt/client/arrow/fbs/Schema.fbs.h>
-
-#include <yt/yt/client/table_client/columnar.h>
-#include <yt/yt/client/table_client/logical_type.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/public.h>
-#include <yt/yt/client/table_client/row_batch.h>
-#include <yt/yt/client/table_client/schema.h>
-
-#include <yt/yt/library/column_converters/column_converter.h>
-
-#include <yt/yt/core/concurrency/async_stream.h>
-#include <yt/yt/core/concurrency/public.h>
-
-#include <yt/yt/core/misc/blob_output.h>
-#include <yt/yt/core/misc/error.h>
-#include <yt/yt/core/misc/range.h>
-
-#include <vector>
-
-namespace NYT::NFormats {
-
-using namespace NTableClient;
-using namespace NComplexTypes;
-
-static const auto& Logger = FormatsLogger;
-
-using TBodyWriter = std::function<void(TMutableRef)>;
-using TBatchColumn = IUnversionedColumnarRowBatch::TColumn;
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TTypedBatchColumn
-{
-    const TBatchColumn* Column;
-    TLogicalTypePtr Type;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-constexpr i64 ArrowAlignment = 8;
-
-flatbuffers::Offset<flatbuffers::String> SerializeString(
-    flatbuffers::FlatBufferBuilder* flatbufBuilder,
-    const TString& str)
-{
-    return flatbufBuilder->CreateString(str.data(), str.length());
-}
-
-std::tuple<org::apache::arrow::flatbuf::Type, flatbuffers::Offset<void>> SerializeColumnType(
-    flatbuffers::FlatBufferBuilder* flatbufBuilder,
-    TColumnSchema schema)
-{
-    auto simpleType = CastToV1Type(schema.LogicalType()).first;
-    switch (simpleType) {
-        case ESimpleLogicalValueType::Null:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_Null,
-                org::apache::arrow::flatbuf::CreateNull(*flatbufBuilder)
-                    .Union());
-
-        case ESimpleLogicalValueType::Int64:
-        case ESimpleLogicalValueType::Uint64:
-        case ESimpleLogicalValueType::Int8:
-        case ESimpleLogicalValueType::Uint8:
-        case ESimpleLogicalValueType::Int16:
-        case ESimpleLogicalValueType::Uint16:
-        case ESimpleLogicalValueType::Int32:
-        case ESimpleLogicalValueType::Uint32:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_Int,
-                org::apache::arrow::flatbuf::CreateInt(
-                    *flatbufBuilder,
-                    GetIntegralTypeBitWidth(simpleType),
-                    IsIntegralTypeSigned(simpleType))
-                    .Union());
-
-        case ESimpleLogicalValueType::Double:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_FloatingPoint,
-                org::apache::arrow::flatbuf::CreateFloatingPoint(
-                    *flatbufBuilder,
-                    org::apache::arrow::flatbuf::Precision_DOUBLE)
-                    .Union());
-
-        case ESimpleLogicalValueType::Boolean:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_Bool,
-                org::apache::arrow::flatbuf::CreateBool(*flatbufBuilder)
-                    .Union());
-
-        case ESimpleLogicalValueType::String:
-        case ESimpleLogicalValueType::Any:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_Binary,
-                org::apache::arrow::flatbuf::CreateBinary(*flatbufBuilder)
-                    .Union());
-
-        case ESimpleLogicalValueType::Utf8:
-            return std::make_tuple(
-                org::apache::arrow::flatbuf::Type_Utf8,
-                org::apache::arrow::flatbuf::CreateUtf8(*flatbufBuilder)
-                    .Union());
-
-            // TODO(babenko): the following types are not supported:
-            //   Date
-            //   Datetime
-            //   Interval
-            //   Timestamp
-
-        default:
-            THROW_ERROR_EXCEPTION("Column %v has type %Qlv that is not currently supported by Arrow encoder",
-                schema.GetDiagnosticNameString(),
-                simpleType);
-    }
-}
-
-bool IsRleButNotDictionaryEncodedStringLikeColumn(const TBatchColumn& column)
-{
-    auto simpleType = CastToV1Type(column.Type).first;
-    return IsStringLikeType(simpleType) &&
-        column.Rle &&
-        !column.Rle->ValueColumn->Dictionary;
-}
-
-bool IsRleAndDictionaryEncodedColumn(const TBatchColumn& column)
-{
-    return column.Rle &&
-        column.Rle->ValueColumn->Dictionary;
-}
-
-bool IsDictionaryEncodedColumn(const TBatchColumn& column)
-{
-    return column.Dictionary ||
-        IsRleAndDictionaryEncodedColumn(column) ||
-        IsRleButNotDictionaryEncodedStringLikeColumn(column);
-}
-
-
-struct TRecordBatchBodyPart
-{
-    i64 Size;
-    TBodyWriter Writer;
-};
-
-struct TRecordBatchSerializationContext final
-{
-    explicit TRecordBatchSerializationContext(flatbuffers::FlatBufferBuilder* flatbufBuilder)
-        : FlatbufBuilder(flatbufBuilder)
-    {}
-
-    void AddFieldNode(i64 length, i64 nullCount)
-    {
-        FieldNodes.emplace_back(length, nullCount);
-    }
-
-    void AddBuffer(i64 size, TBodyWriter writer)
-    {
-        YT_LOG_DEBUG("Buffer registered (Offset: %v, Size: %v)",
-            CurrentBodyOffset,
-            size);
-
-        Buffers.emplace_back(CurrentBodyOffset, size);
-        CurrentBodyOffset += AlignUp<i64>(size, ArrowAlignment);
-        Parts.push_back(TRecordBatchBodyPart{size, std::move(writer)});
-    }
-
-    flatbuffers::FlatBufferBuilder* const FlatbufBuilder;
-
-    i64 CurrentBodyOffset = 0;
-    std::vector<org::apache::arrow::flatbuf::FieldNode> FieldNodes;
-    std::vector<org::apache::arrow::flatbuf::Buffer> Buffers;
-    std::vector<TRecordBatchBodyPart> Parts;
-};
-
-template <class T>
-TMutableRange<T> GetTypedValues(TMutableRef ref)
-{
-    return MakeMutableRange(
-        reinterpret_cast<T*>(ref.Begin()),
-        reinterpret_cast<T*>(ref.End()));
-}
-
-void SerializeColumnPrologue(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    if (column->NullBitmap ||
-        column->Rle && column->Rle->ValueColumn->NullBitmap)
-    {
-        if (column->Rle) {
-            const auto* valueColumn = column->Rle->ValueColumn;
-            auto rleIndexes = column->GetTypedValues<ui64>();
-
-            context->AddFieldNode(
-                column->ValueCount,
-                CountOnesInRleBitmap(
-                    valueColumn->NullBitmap->Data,
-                    rleIndexes,
-                    column->StartIndex,
-                    column->StartIndex + column->ValueCount));
-
-            context->AddBuffer(
-                GetBitmapByteSize(column->ValueCount),
-                [=] (TMutableRef dstRef) {
-                    BuildValidityBitmapFromRleNullBitmap(
-                        valueColumn->NullBitmap->Data,
-                        rleIndexes,
-                        column->StartIndex,
-                        column->StartIndex + column->ValueCount,
-                        dstRef);
-                });
-        } else {
-            context->AddFieldNode(
-                column->ValueCount,
-                CountOnesInBitmap(
-                    column->NullBitmap->Data,
-                    column->StartIndex,
-                    column->StartIndex + column->ValueCount));
-
-            context->AddBuffer(
-                GetBitmapByteSize(column->ValueCount),
-                [=] (TMutableRef dstRef) {
-                    CopyBitmapRangeToBitmapNegated(
-                        column->NullBitmap->Data,
-                        column->StartIndex,
-                        column->StartIndex + column->ValueCount,
-                        dstRef);
-                });
-        }
-    } else {
-        context->AddFieldNode(
-            column->ValueCount,
-            0);
-
-        context->AddBuffer(
-            0,
-            [=] (TMutableRef /*dstRef*/) {
-            });
-    }
-}
-
-void SerializeRleButNotDictionaryEncodedStringLikeColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(column->Values->BitWidth == 64);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(!column->Values->ZigZagEncoded);
-
-    YT_LOG_DEBUG("Adding RLE but not dictionary-encoded string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount);
-
-    SerializeColumnPrologue(typedColumn, context);
-
-    auto rleIndexes = column->GetTypedValues<ui64>();
-
-    context->AddBuffer(
-        sizeof(ui32) * column->ValueCount,
-        [=] (TMutableRef dstRef) {
-            BuildIotaDictionaryIndexesFromRleIndexes(
-                rleIndexes,
-                column->StartIndex,
-                column->StartIndex + column->ValueCount,
-                GetTypedValues<ui32>(dstRef));
-        });
-}
-
-void SerializeDictionaryColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(column->Dictionary->ZeroMeansNull);
-    YT_VERIFY(column->Values->BitWidth == 32);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(!column->Values->ZigZagEncoded);
-
-    YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount,
-        column->Rle.has_value());
-
-    auto relevantDictionaryIndexes = column->GetRelevantTypedValues<ui32>();
-
-    context->AddFieldNode(
-        column->ValueCount,
-        CountNullsInDictionaryIndexesWithZeroNull(relevantDictionaryIndexes));
-
-    context->AddBuffer(
-        GetBitmapByteSize(column->ValueCount),
-        [=] (TMutableRef dstRef) {
-            BuildValidityBitmapFromDictionaryIndexesWithZeroNull(
-                relevantDictionaryIndexes,
-                dstRef);
-        });
-
-    context->AddBuffer(
-        sizeof(ui32) * column->ValueCount,
-        [=] (TMutableRef dstRef) {
-            BuildDictionaryIndexesFromDictionaryIndexesWithZeroNull(
-                relevantDictionaryIndexes,
-                GetTypedValues<ui32>(dstRef));
-        });
-}
-
-void SerializeRleDictionaryColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(column->Values->BitWidth == 64);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(!column->Values->ZigZagEncoded);
-    YT_VERIFY(column->Rle->ValueColumn->Dictionary->ZeroMeansNull);
-    YT_VERIFY(column->Rle->ValueColumn->Values->BitWidth == 32);
-    YT_VERIFY(column->Rle->ValueColumn->Values->BaseValue == 0);
-    YT_VERIFY(!column->Rle->ValueColumn->Values->ZigZagEncoded);
-
-    YT_LOG_DEBUG("Adding dictionary column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount,
-        column->Rle.has_value());
-
-    auto dictionaryIndexes = column->Rle->ValueColumn->GetTypedValues<ui32>();
-    auto rleIndexes = column->GetTypedValues<ui64>();
-
-    context->AddFieldNode(
-        column->ValueCount,
-        CountNullsInRleDictionaryIndexesWithZeroNull(
-            dictionaryIndexes,
-            rleIndexes,
-            column->StartIndex,
-            column->StartIndex + column->ValueCount));
-
-    context->AddBuffer(
-        GetBitmapByteSize(column->ValueCount),
-        [=] (TMutableRef dstRef) {
-            BuildValidityBitmapFromRleDictionaryIndexesWithZeroNull(
-                dictionaryIndexes,
-                rleIndexes,
-                column->StartIndex,
-                column->StartIndex + column->ValueCount,
-                dstRef);
-        });
-
-    context->AddBuffer(
-        sizeof(ui32) * column->ValueCount,
-        [=] (TMutableRef dstRef) {
-            BuildDictionaryIndexesFromRleDictionaryIndexesWithZeroNull(
-                dictionaryIndexes,
-                rleIndexes,
-                column->StartIndex,
-                column->StartIndex + column->ValueCount,
-                GetTypedValues<ui32>(dstRef));
-        });
-}
-
-void SerializeIntegerColumn(
-    const TTypedBatchColumn& typedColumn,
-    ESimpleLogicalValueType simpleType,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-
-    YT_LOG_DEBUG("Adding integer column (ColumnId: %v, StartIndex: %v, ValueCount: %v, Rle: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount,
-        column->Rle.has_value());
-
-    SerializeColumnPrologue(typedColumn, context);
-
-    context->AddBuffer(
-        column->ValueCount * GetIntegralTypeByteSize(simpleType),
-        [=] (TMutableRef dstRef) {
-            const auto* valueColumn = column->Rle
-                ? column->Rle->ValueColumn
-                : column;
-            auto values = valueColumn->GetTypedValues<ui64>();
-
-            auto rleIndexes = column->Rle
-                ? column->GetTypedValues<ui64>()
-                : TRange<ui64>();
-
-            switch (simpleType) {
-#define XX(cppType, ytType)                               \
-    case ESimpleLogicalValueType::ytType: {               \
-        auto dstValues = GetTypedValues<cppType>(dstRef); \
-        auto* currentOutput = dstValues.Begin();          \
-        DecodeIntegerVector(                              \
-            column->StartIndex,                           \
-            column->StartIndex + column->ValueCount,      \
-            valueColumn->Values->BaseValue,               \
-            valueColumn->Values->ZigZagEncoded,           \
-            TRange<ui32>(),                               \
-            rleIndexes,                                   \
-            [&] (auto index) {                            \
-                return values[index];                     \
-            },                                            \
-            [&] (auto value) {                            \
-                *currentOutput++ = value;                 \
-            });                                           \
-        break;                                            \
-    }
-
-                XX(i8, Int8)
-                XX(i16, Int16)
-                XX(i32, Int32)
-                XX(i64, Int64)
-                XX(ui8, Uint8)
-                XX(ui16, Uint16)
-                XX(ui32, Uint32)
-                XX(ui64, Uint64)
-
-#undef XX
-
-                default:
-                    THROW_ERROR_EXCEPTION("Integer column %v has unexpected type %Qlv",
-                        typedColumn.Column->Id,
-                        simpleType);
-            }
-        });
-}
-
-void SerializeDoubleColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(column->Values->BitWidth == 64);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(!column->Values->ZigZagEncoded);
-
-    YT_LOG_DEBUG("Adding double column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount,
-        column->Rle.has_value());
-
-    SerializeColumnPrologue(typedColumn, context);
-
-    context->AddBuffer(
-        column->ValueCount * sizeof(double),
-        [=] (TMutableRef dstRef) {
-            auto relevantValues = column->GetRelevantTypedValues<double>();
-            ::memcpy(
-                dstRef.Begin(),
-                relevantValues.Begin(),
-                column->ValueCount * sizeof(double));
-        });
-}
-
-void SerializeStringLikeColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(column->Values->BitWidth == 32);
-    YT_VERIFY(column->Values->ZigZagEncoded);
-    YT_VERIFY(column->Strings);
-    YT_VERIFY(column->Strings->AvgLength);
-    YT_VERIFY(!column->Rle);
-
-    auto startIndex = column->StartIndex;
-    auto endIndex = startIndex + column->ValueCount;
-    auto stringData = column->Strings->Data;
-    auto avgLength = *column->Strings->AvgLength;
-
-    auto offsets = column->GetTypedValues<ui32>();
-    auto startOffset = DecodeStringOffset(offsets, avgLength, startIndex);
-    auto endOffset = DecodeStringOffset(offsets, avgLength, endIndex);
-    auto stringsSize = endOffset - startOffset;
-
-    YT_LOG_DEBUG("Adding string-like column (ColumnId: %v, StartIndex: %v, ValueCount: %v, StartOffset: %v, EndOffset: %v, StringsSize: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount,
-        startOffset,
-        endOffset,
-        stringsSize);
-
-    SerializeColumnPrologue(typedColumn, context);
-
-    context->AddBuffer(
-        sizeof(i32) * (column->ValueCount + 1),
-        [=] (TMutableRef dstRef) {
-            DecodeStringOffsets(
-                offsets,
-                avgLength,
-                startIndex,
-                endIndex,
-                GetTypedValues<ui32>(dstRef));
-        });
-
-    context->AddBuffer(
-        stringsSize,
-        [=] (TMutableRef dstRef) {
-            ::memcpy(
-                dstRef.Begin(),
-                stringData.Begin() + startOffset,
-                stringsSize);
-        });
-}
-
-void SerializeBooleanColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-    YT_VERIFY(column->Values);
-    YT_VERIFY(!column->Values->ZigZagEncoded);
-    YT_VERIFY(column->Values->BaseValue == 0);
-    YT_VERIFY(column->Values->BitWidth == 1);
-
-    YT_LOG_DEBUG("Adding boolean column (ColumnId: %v, StartIndex: %v, ValueCount: %v)",
-        column->Id,
-        column->StartIndex,
-        column->ValueCount);
-
-    SerializeColumnPrologue(typedColumn, context);
-
-    context->AddBuffer(
-        GetBitmapByteSize(column->ValueCount),
-        [=] (TMutableRef dstRef) {
-            CopyBitmapRangeToBitmap(
-                column->Values->Data,
-                column->StartIndex,
-                column->StartIndex + column->ValueCount,
-                dstRef);
-        });
-}
-
-void SerializeColumn(
-    const TTypedBatchColumn& typedColumn,
-    TRecordBatchSerializationContext* context)
-{
-    const auto* column = typedColumn.Column;
-
-    if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
-        SerializeRleButNotDictionaryEncodedStringLikeColumn(typedColumn, context);
-        return;
-    }
-
-    if (column->Dictionary) {
-        SerializeDictionaryColumn(typedColumn, context);
-        return;
-    }
-
-    if (column->Rle && column->Rle->ValueColumn->Dictionary) {
-        SerializeRleDictionaryColumn(typedColumn, context);
-        return;
-    }
-
-    auto simpleType = CastToV1Type(typedColumn.Type).first;
-    if (IsIntegralType(simpleType)) {
-        SerializeIntegerColumn(typedColumn, simpleType, context);
-    } else if (simpleType == ESimpleLogicalValueType::Double) {
-        SerializeDoubleColumn(typedColumn, context);
-    } else if (IsStringLikeType(simpleType)) {
-        SerializeStringLikeColumn(typedColumn, context);
-    } else if (simpleType == ESimpleLogicalValueType::Boolean) {
-        SerializeBooleanColumn(typedColumn, context);
-    } else if (simpleType == ESimpleLogicalValueType::Null) {
-        // No buffers are allocated for null columns.
-    } else {
-        THROW_ERROR_EXCEPTION("Column %v has unexpected type %Qlv",
-            typedColumn.Column->Id,
-            simpleType);
-    }
-}
-
-auto SerializeRecordBatch(
-    flatbuffers::FlatBufferBuilder* flatbufBuilder,
-    int length,
-    TRange<TTypedBatchColumn> typedColumns)
-{
-    auto context = New<TRecordBatchSerializationContext>(flatbufBuilder);
-
-    for (const auto& typedColumn : typedColumns) {
-        SerializeColumn(typedColumn, context.Get());
-    }
-
-    auto fieldNodesOffset = flatbufBuilder->CreateVectorOfStructs(context->FieldNodes);
-
-    auto buffersOffset = flatbufBuilder->CreateVectorOfStructs(context->Buffers);
-
-    auto recordBatchOffset = org::apache::arrow::flatbuf::CreateRecordBatch(
-        *flatbufBuilder,
-        length,
-        fieldNodesOffset,
-        buffersOffset);
-
-    auto totalSize = context->CurrentBodyOffset;
-
-    return std::make_tuple(
-        recordBatchOffset,
-        totalSize,
-        [context = std::move(context)] (TMutableRef dstRef) {
-            char* current = dstRef.Begin();
-            for (const auto& part : context->Parts) {
-                part.Writer(TMutableRef(current, current + part.Size));
-                current += AlignUp<i64>(part.Size, ArrowAlignment);
-            }
-            YT_VERIFY(current == dstRef.End());
-        });
-}
-///////////////////////////////////////////////////////////////////////////////
-
-class TArrowWriter
-    : public TSchemalessFormatWriterBase
-{
-public:
-    TArrowWriter(
-        TNameTablePtr nameTable,
-        const std::vector<NTableClient::TTableSchemaPtr>& tableSchemas,
-        NConcurrency::IAsyncOutputStreamPtr output,
-        bool enableContextSaving,
-        TControlAttributesConfigPtr controlAttributesConfig,
-        int keyColumnCount)
-        : TSchemalessFormatWriterBase(
-            std::move(nameTable),
-            std::move(output),
-            enableContextSaving,
-            std::move(controlAttributesConfig),
-            keyColumnCount)
-    {
-        YT_VERIFY(tableSchemas.size() > 0);
-
-        auto tableSchema = tableSchemas[0];
-        auto columnCount = NameTable_->GetSize();
-
-        for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
-            ColumnSchemas_.push_back(GetColumnSchema(tableSchema, columnIndex));
-        }
-    }
-
-private:
-    void Reset()
-    {
-        Messages_.clear();
-        TypedColumns_.clear();
-        NumberOfRows_ = 0;
-    }
-
-    void DoWrite(TRange<TUnversionedRow> rows) override
-    {
-        Reset();
-
-        auto convertedColumns = NColumnConverters::ConvertRowsToColumns(rows, ColumnSchemas_);
-
-        std::vector<const TBatchColumn*> rootColumns;
-        rootColumns.reserve( std::ssize(convertedColumns));
-        for (ssize_t columnIndex = 0; columnIndex < std::ssize(convertedColumns); columnIndex++) {
-            rootColumns.push_back(convertedColumns[columnIndex].RootColumn);
-        }
-        NumberOfRows_ = rows.size();
-        PrepareColumns(rootColumns);
-        Encode();
-    }
-
-    void DoWriteBatch(NTableClient::IUnversionedRowBatchPtr rowBatch) override
-    {
-        auto columnarBatch = rowBatch->TryAsColumnar();
-        if (!columnarBatch) {
-            YT_LOG_DEBUG("Encoding non-columnar batch; running write rows");
-            DoWrite(rowBatch->MaterializeRows());
-        } else {
-            YT_LOG_DEBUG("Encoding columnar batch");
-            Reset();
-            NumberOfRows_ = rowBatch->GetRowCount();
-            PrepareColumns(columnarBatch->MaterializeColumns());
-            Encode();
-        }
-    }
-
-    void Encode()
-    {
-        auto output = GetOutputStream();
-        if (IsSchemaMessageNeeded()) {
-            if (!IsFirstBatch_) {
-                RegisterEosMarker();
-            }
-            ResetArrowDictionaries();
-            PrepareSchema();
-        }
-        IsFirstBatch_ = false;
-        PrepareDictionaryBatches();
-        PrepareRecordBatch();
-
-        WritePayload(output);
-        TryFlushBuffer(true);
-    }
-
-private:
-    bool IsFirstBatch_ = true;
-    size_t NumberOfRows_ = 0;
-    std::vector<TTypedBatchColumn> TypedColumns_;
-    std::vector<TColumnSchema> ColumnSchemas_;
-    std::vector<IUnversionedColumnarRowBatch::TDictionaryId> ArrowDictionaryIds_;
-
-    struct TMessage
-    {
-        std::optional<flatbuffers::FlatBufferBuilder> FlatbufBuilder;
-        i64 BodySize;
-        TBodyWriter BodyWriter;
-    };
-
-    std::vector<TMessage> Messages_;
-
-    bool CheckIfSystemColumnEnable(int columnIndex)
-    {
-        return ControlAttributesConfig_->EnableTableIndex && IsTableIndexColumnId(columnIndex) ||
-            ControlAttributesConfig_->EnableRangeIndex && IsRangeIndexColumnId(columnIndex) ||
-            ControlAttributesConfig_->EnableRowIndex && IsRowIndexColumnId(columnIndex) ||
-            ControlAttributesConfig_->EnableTabletIndex && IsTabletIndexColumnId(columnIndex);
-    }
-
-    bool CheckIfTypeIsNotNull(int columnIndex)
-    {
-        YT_VERIFY(columnIndex >= 0 && columnIndex < std::ssize(ColumnSchemas_));
-        return CastToV1Type(ColumnSchemas_[columnIndex].LogicalType()).first != ESimpleLogicalValueType::Null;
-    }
-
-    TColumnSchema GetColumnSchema(NTableClient::TTableSchemaPtr& tableSchema, int columnIndex)
-    {
-        YT_VERIFY(columnIndex >= 0);
-        auto name = NameTable_->GetName(columnIndex);
-        auto columnSchema = tableSchema->FindColumn(name);
-        if (!columnSchema) {
-            if (IsSystemColumnId(columnIndex) && CheckIfSystemColumnEnable(columnIndex)) {
-                return TColumnSchema(TString(name), EValueType::Int64);
-            }
-            return TColumnSchema(TString(name), EValueType::Null);
-        }
-        return *columnSchema;
-    }
-
-    void PrepareColumns(const TRange<const TBatchColumn*>& batchColumns)
-    {
-        TypedColumns_.reserve(batchColumns.Size());
-        for (const auto* column : batchColumns) {
-            if (CheckIfTypeIsNotNull(column->Id)) {
-                YT_VERIFY(column->Id >= 0 && column->Id < std::ssize(ColumnSchemas_));
-                TypedColumns_.push_back(TTypedBatchColumn{
-                        column,
-                        ColumnSchemas_[column->Id].LogicalType()});
-            }
-        }
-    }
-
-    bool IsSchemaMessageNeeded()
-    {
-        if (IsFirstBatch_) {
-            return true;
-        }
-        YT_VERIFY(ArrowDictionaryIds_.size() == TypedColumns_.size());
-        bool result = false;
-        for (int index = 0; index < std::ssize(TypedColumns_); ++index) {
-            bool currentDictionary = IsDictionaryEncodedColumn(*TypedColumns_[index].Column);
-            bool previousDictionary = ArrowDictionaryIds_[index] != IUnversionedColumnarRowBatch::NullDictionaryId;
-            if (currentDictionary != previousDictionary) {
-                result = true;
-            }
-        }
-        return result;
-    }
-
-    void ResetArrowDictionaries()
-    {
-        ArrowDictionaryIds_.assign(TypedColumns_.size(), IUnversionedColumnarRowBatch::NullDictionaryId);
-    }
-
-    void RegisterEosMarker()
-    {
-        YT_LOG_DEBUG("EOS marker registered");
-
-        Messages_.push_back(TMessage{
-                std::nullopt,
-                0,
-                TBodyWriter()});
-    }
-
-    void RegisterMessage(
-        [[maybe_unused]] org::apache::arrow::flatbuf::MessageHeader type,
-        flatbuffers::FlatBufferBuilder&& flatbufBuilder,
-        i64 bodySize = 0,
-        std::function<void(TMutableRef)> bodyWriter = nullptr)
-    {
-        YT_LOG_DEBUG("Message registered (Type: %v, MessageSize: %v, BodySize: %v)",
-            org::apache::arrow::flatbuf::EnumNamesMessageHeader()[type],
-            flatbufBuilder.GetSize(),
-            bodySize);
-
-        YT_VERIFY((bodySize % ArrowAlignment) == 0);
-        Messages_.push_back(TMessage{
-                std::move(flatbufBuilder),
-                bodySize,
-                std::move(bodyWriter)});
-    }
-
-    void PrepareSchema()
-    {
-        flatbuffers::FlatBufferBuilder flatbufBuilder;
-
-        int arrowDictionaryIdCounter = 0;
-        std::vector<flatbuffers::Offset<org::apache::arrow::flatbuf::Field>> fieldOffsets;
-        for (int columnIndex = 0; columnIndex < std::ssize(TypedColumns_); columnIndex++) {
-            const auto& typedColumn = TypedColumns_[columnIndex];
-            YT_VERIFY(typedColumn.Column->Id >= 0 && typedColumn.Column->Id < std::ssize(ColumnSchemas_));
-            auto columnSchema = ColumnSchemas_[typedColumn.Column->Id];
-            auto nameOffset = SerializeString(&flatbufBuilder, columnSchema.Name());
-
-            auto [typeType, typeOffset] = SerializeColumnType(&flatbufBuilder, columnSchema);
-
-            flatbuffers::Offset<org::apache::arrow::flatbuf::DictionaryEncoding> dictionaryEncodingOffset;
-            auto index_type_offset = org::apache::arrow::flatbuf::CreateInt(flatbufBuilder, 32, false);
-
-            if (IsDictionaryEncodedColumn(*typedColumn.Column)) {
-                dictionaryEncodingOffset = org::apache::arrow::flatbuf::CreateDictionaryEncoding(
-                    flatbufBuilder,
-                    arrowDictionaryIdCounter++,
-                    index_type_offset);
-            }
-
-            auto fieldOffset = org::apache::arrow::flatbuf::CreateField(
-                flatbufBuilder,
-                nameOffset,
-                columnSchema.LogicalType()->IsNullable(),
-                typeType,
-                typeOffset,
-                dictionaryEncodingOffset);
-
-            fieldOffsets.push_back(fieldOffset);
-        }
-
-        auto fieldsOffset = flatbufBuilder.CreateVector(fieldOffsets);
-
-        auto schemaOffset = org::apache::arrow::flatbuf::CreateSchema(
-            flatbufBuilder,
-            org::apache::arrow::flatbuf::Endianness_Little,
-            fieldsOffset);
-
-        auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
-            flatbufBuilder,
-            org::apache::arrow::flatbuf::MetadataVersion_V4,
-            org::apache::arrow::flatbuf::MessageHeader_Schema,
-            schemaOffset.Union(),
-            0);
-
-        flatbufBuilder.Finish(messageOffset);
-
-        RegisterMessage(
-            org::apache::arrow::flatbuf::MessageHeader_Schema,
-            std::move(flatbufBuilder));
-    }
-
-    void PrepareDictionaryBatches()
-    {
-        int arrowDictionaryIdCounter = 0;
-        auto prepareDictionaryBatch = [&] (
-            int columnIndex,
-            IUnversionedColumnarRowBatch::TDictionaryId ytDictionaryId,
-            const TBatchColumn* dictionaryColumn) {
-            int arrowDictionaryId = arrowDictionaryIdCounter++;
-            const auto& typedColumn = TypedColumns_[columnIndex];
-            auto previousYTDictionaryId = ArrowDictionaryIds_[columnIndex];
-            if (ytDictionaryId == previousYTDictionaryId) {
-                YT_LOG_DEBUG("Reusing previous dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
-                    typedColumn.Column->Id,
-                    ytDictionaryId,
-                    arrowDictionaryId);
-            } else {
-                YT_LOG_DEBUG("Sending new dictionary (ColumnId: %v, YTDictionaryId: %v, ArrowDictionaryId: %v)",
-                    typedColumn.Column->Id,
-                    ytDictionaryId,
-                    arrowDictionaryId);
-                PrepareDictionaryBatch(
-                    TTypedBatchColumn{dictionaryColumn, typedColumn.Type},
-                    arrowDictionaryId);
-                ArrowDictionaryIds_[columnIndex] = ytDictionaryId;
-            }
-        };
-
-        for (int columnIndex = 0; columnIndex < std::ssize(TypedColumns_); ++columnIndex) {
-            const auto& typedColumn = TypedColumns_[columnIndex];
-            if (typedColumn.Column->Dictionary) {
-                YT_LOG_DEBUG("Adding dictionary batch for dictionary-encoded column (ColumnId: %v)",
-                    typedColumn.Column->Id);
-                prepareDictionaryBatch(
-                    columnIndex,
-                    typedColumn.Column->Dictionary->DictionaryId,
-                    typedColumn.Column->Dictionary->ValueColumn);
-            } else if (IsRleButNotDictionaryEncodedStringLikeColumn(*typedColumn.Column)) {
-                YT_LOG_DEBUG("Adding dictionary batch for RLE but not dictionary-encoded string-like column (ColumnId: %v)",
-                    typedColumn.Column->Id);
-                prepareDictionaryBatch(
-                    columnIndex,
-                    IUnversionedColumnarRowBatch::GenerateDictionaryId(), // any unique one will do
-                    typedColumn.Column->Rle->ValueColumn);
-            } else if (IsRleAndDictionaryEncodedColumn(*typedColumn.Column)) {
-                YT_LOG_DEBUG("Adding dictionary batch for RLE and dictionary-encoded column (ColumnId: %v)",
-                    typedColumn.Column->Id);
-                prepareDictionaryBatch(
-                    columnIndex,
-                    typedColumn.Column->Rle->ValueColumn->Dictionary->DictionaryId,
-                    typedColumn.Column->Rle->ValueColumn->Dictionary->ValueColumn);
-            }
-        }
-    }
-
-    void PrepareDictionaryBatch(
-        const TTypedBatchColumn& typedColumn,
-        int arrowDictionaryId)
-    {
-        flatbuffers::FlatBufferBuilder flatbufBuilder;
-
-        auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
-            &flatbufBuilder,
-            typedColumn.Column->ValueCount,
-            MakeRange({typedColumn}));
-
-        auto dictionaryBatchOffset = org::apache::arrow::flatbuf::CreateDictionaryBatch(
-            flatbufBuilder,
-            arrowDictionaryId,
-            recordBatchOffset);
-
-        auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
-            flatbufBuilder,
-            org::apache::arrow::flatbuf::MetadataVersion_V4,
-            org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
-            dictionaryBatchOffset.Union(),
-            bodySize);
-
-        flatbufBuilder.Finish(messageOffset);
-
-        RegisterMessage(
-            org::apache::arrow::flatbuf::MessageHeader_DictionaryBatch,
-            std::move(flatbufBuilder),
-            bodySize,
-            std::move(bodyWriter));
-    }
-
-    void PrepareRecordBatch()
-    {
-        flatbuffers::FlatBufferBuilder flatbufBuilder;
-
-        auto [recordBatchOffset, bodySize, bodyWriter] = SerializeRecordBatch(
-            &flatbufBuilder,
-            NumberOfRows_,
-            TypedColumns_);
-
-        auto messageOffset = org::apache::arrow::flatbuf::CreateMessage(
-            flatbufBuilder,
-            org::apache::arrow::flatbuf::MetadataVersion_V4,
-            org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
-            recordBatchOffset.Union(),
-            bodySize);
-
-        flatbufBuilder.Finish(messageOffset);
-
-        RegisterMessage(
-            org::apache::arrow::flatbuf::MessageHeader_RecordBatch,
-            std::move(flatbufBuilder),
-            bodySize,
-            std::move(bodyWriter));
-    }
-
-    i64 GetPayloadSize() const
-    {
-        i64 size = 0;
-        for (const auto& message : Messages_) {
-            size += sizeof(ui32); // continuation indicator
-            size += sizeof(ui32); // metadata size
-            if (message.FlatbufBuilder) {
-                size += AlignUp<i64>(message.FlatbufBuilder->GetSize(), ArrowAlignment); // metadata message
-                size += AlignUp<i64>(message.BodySize, ArrowAlignment);                  // body
-            }
-        }
-        return size;
-    }
-
-    void WritePayload(TBlobOutput* output)
-    {
-        YT_LOG_DEBUG("Started writing payload");
-        for (const auto& message : Messages_) {
-            // Continuation indicator
-            ui32 constMax = 0xFFFFFFFF;
-            output->Write(&constMax, sizeof(ui32));
-
-            if (message.FlatbufBuilder) {
-                auto metadataSize = message.FlatbufBuilder->GetSize();
-
-                auto metadataPtr = message.FlatbufBuilder->GetBufferPointer();
-
-
-                ui32 metadataSz = AlignUp<i64>(metadataSize, ArrowAlignment);
-
-                output->Write(&metadataSz, sizeof(ui32));
-                output->Write(metadataPtr, metadataSize);
-
-                // Body
-                if (message.BodyWriter) {
-                    TString current;
-                    current.resize(message.BodySize);
-                    // Double copying.
-                    message.BodyWriter(TMutableRef::FromString(current));
-                    output->Write(current.data(), message.BodySize);
-                } else {
-                    YT_VERIFY(message.BodySize == 0);
-                }
-            } else {
-                // EOS marker
-                ui32 zero = 0;
-                output->Write(&zero, sizeof(ui32));
-            }
-        }
-
-        YT_LOG_DEBUG("Finished writing payload");
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateWriterForArrow(
-    NTableClient::TNameTablePtr nameTable,
-    const std::vector<NTableClient::TTableSchemaPtr>& schemas,
-    NConcurrency::IAsyncOutputStreamPtr output,
-    bool enableContextSaving,
-    TControlAttributesConfigPtr controlAttributesConfig,
-    int keyColumnCount)
-{
-    auto result = New<TArrowWriter>(
-        std::move(nameTable),
-        schemas,
-        std::move(output),
-        enableContextSaving,
-        std::move(controlAttributesConfig),
-        keyColumnCount);
-
-    return result;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NFormats

+ 0 - 26
yt/yt/client/formats/arrow_writer.h

@@ -1,26 +0,0 @@
-#pragma once
-
-#include "public.h"
-
-#include <yt/yt/client/table_client/public.h>
-
-#include <yt/yt/core/concurrency/public.h>
-
-#include <yt/yt/core/ytree/public.h>
-
-
-namespace NYT::NFormats {
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateWriterForArrow(
-    NTableClient::TNameTablePtr nameTable,
-    const std::vector<NTableClient::TTableSchemaPtr>& schemas,
-    NConcurrency::IAsyncOutputStreamPtr output,
-    bool enableContextSaving,
-    TControlAttributesConfigPtr controlAttributesConfig,
-    int keyColumnCount);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NFormat

+ 0 - 3
yt/yt/client/formats/ya.make

@@ -3,7 +3,6 @@ LIBRARY()
 INCLUDE(${ARCADIA_ROOT}/yt/ya_cpp.make.inc)
 
 SRCS(
-    arrow_writer.cpp
     config.cpp
     dsv_parser.cpp
     dsv_writer.cpp
@@ -38,9 +37,7 @@ SRCS(
 
 PEERDIR(
     yt/yt/client
-    yt/yt/client/arrow/fbs
     yt/yt/library/skiff_ext
-    yt/yt/library/column_converters
     yt/yt_proto/yt/formats
     library/cpp/string_utils/base64
 )

+ 0 - 942
yt/yt/client/unittests/arrow_writer_ut.cpp

@@ -1,942 +0,0 @@
-#include <yt/yt/core/test_framework/framework.h>
-
-#include <yt/yt/client/formats/arrow_writer.h>
-#include <yt/yt/client/formats/config.h>
-#include <yt/yt/client/formats/format.h>
-
-#include <yt/yt/client/table_client/helpers.h>
-#include <yt/yt/client/table_client/name_table.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-#include <yt/yt/client/table_client/validate_logical_type.h>
-
-#include <yt/yt/ytlib/chunk_client/chunk_reader.h>
-#include <yt/yt/ytlib/chunk_client/chunk_reader_options.h>
-#include <yt/yt/ytlib/chunk_client/chunk_reader_statistics.h>
-#include <yt/yt/ytlib/chunk_client/memory_reader.h>
-#include <yt/yt/ytlib/chunk_client/memory_writer.h>
-
-#include <yt/yt/ytlib/table_client/cached_versioned_chunk_meta.h>
-#include <yt/yt/ytlib/table_client/chunk_state.h>
-#include <yt/yt/ytlib/table_client/config.h>
-#include <yt/yt/ytlib/table_client/schemaless_chunk_writer.h>
-#include <yt/yt/ytlib/table_client/schemaless_multi_chunk_reader.h>
-
-#include <yt/yt/library/named_value/named_value.h>
-
-#include <util/stream/null.h>
-#include <util/string/hex.h>
-
-#include <contrib/libs/apache/arrow/cpp/src/arrow/api.h>
-#include <contrib/libs/apache/arrow/cpp/src/arrow/io/api.h>
-#include <contrib/libs/apache/arrow/cpp/src/arrow/io/memory.h>
-#include <contrib/libs/apache/arrow/cpp/src/arrow/ipc/api.h>
-
-#include <stdlib.h>
-
-namespace NYT::NTableClient {
-
-namespace {
-
-using namespace NChunkClient;
-using namespace NFormats;
-using namespace NNamedValue;
-using namespace NTableClient;
-
-////////////////////////////////////////////////////////////////////////////////
-
-IUnversionedRowBatchPtr MakeColumnarRowBatch(
-    TRange<NTableClient::TUnversionedRow> rows,
-    TTableSchemaPtr Schema_)
-{
-
-    auto memoryWriter = New<TMemoryWriter>();
-
-    auto config = New<TChunkWriterConfig>();
-    config->Postprocess();
-    config->BlockSize = 256;
-    config->Postprocess();
-
-    auto options = New<TChunkWriterOptions>();
-    options->OptimizeFor = EOptimizeFor::Scan;
-    options->Postprocess();
-
-    auto chunkWriter = CreateSchemalessChunkWriter(
-        config,
-        options,
-        Schema_,
-        /*nameTable*/ nullptr,
-        memoryWriter,
-        /*dataSink*/ std::nullopt);
-
-    TUnversionedRowsBuilder builder;
-
-    chunkWriter->Write(rows);
-    chunkWriter->Close().Get().IsOK();
-
-    auto MemoryReader_ = CreateMemoryReader(
-        memoryWriter->GetChunkMeta(),
-        memoryWriter->GetBlocks());
-
-    NChunkClient::NProto::TChunkSpec ChunkSpec_;
-    ToProto(ChunkSpec_.mutable_chunk_id(), NullChunkId);
-    ChunkSpec_.set_table_row_index(42);
-
-    auto ChunkMeta_ = New<TColumnarChunkMeta>(*memoryWriter->GetChunkMeta());
-
-    auto ChunkState_ = New<TChunkState>(TChunkState{
-            .BlockCache = GetNullBlockCache(),
-            .ChunkSpec = ChunkSpec_,
-            .TableSchema = Schema_,
-    });
-
-    auto schemalessRangeChunkReader = CreateSchemalessRangeChunkReader(
-        ChunkState_,
-        ChunkMeta_,
-        TChunkReaderConfig::GetDefault(),
-        TChunkReaderOptions::GetDefault(),
-        MemoryReader_,
-        TNameTable::FromSchema(*Schema_),
-        /* chunkReadOptions */ {},
-        /* sortColumns */ {},
-        /* omittedInaccessibleColumns */ {},
-        TColumnFilter(),
-        TReadRange());
-
-    TRowBatchReadOptions opt{
-        .MaxRowsPerRead = static_cast<i64>(rows.size()) + 10,
-        .Columnar = true};
-    auto batch = ReadRowBatch(schemalessRangeChunkReader, opt);
-    return batch;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-ISchemalessFormatWriterPtr CreateArrowWriter(TNameTablePtr nameTable,
-    IOutputStream* outputStream,
-    const std::vector<NTableClient::TTableSchemaPtr>& schemas)
-{
-    auto controlAttributes = NYT::New<TControlAttributesConfig>();
-    controlAttributes->EnableTableIndex = false;
-    controlAttributes->EnableRowIndex = false;
-    controlAttributes->EnableRangeIndex = false;
-    controlAttributes->EnableTabletIndex = false;
-    return CreateWriterForArrow(
-        nameTable,
-        schemas,
-        NConcurrency::CreateAsyncAdapter(static_cast<IOutputStream*>(outputStream)),
-        false,
-        controlAttributes,
-        0);
-}
-
-ISchemalessFormatWriterPtr CreateArrowWriterWithSystemColumns(TNameTablePtr nameTable,
-    IOutputStream* outputStream,
-    const std::vector<NTableClient::TTableSchemaPtr>& schemas)
-{
-    auto controlAttributes = NYT::New<TControlAttributesConfig>();
-    controlAttributes->EnableTableIndex = true;
-    controlAttributes->EnableRowIndex = true;
-    controlAttributes->EnableRangeIndex = true;
-    controlAttributes->EnableTabletIndex = true;
-    return CreateWriterForArrow(
-        nameTable,
-        schemas,
-        NConcurrency::CreateAsyncAdapter(static_cast<IOutputStream*>(outputStream)),
-        false,
-        controlAttributes,
-        0);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::shared_ptr<arrow::RecordBatch> MakeBatch(const TStringStream& outputStream)
-{
-    auto buffer = arrow::Buffer(reinterpret_cast<const uint8_t*>(outputStream.Data()), outputStream.Size());
-    arrow::io::BufferReader bufferReader(buffer);
-
-    std::shared_ptr<arrow::ipc::RecordBatchStreamReader> batchReader = (arrow::ipc::RecordBatchStreamReader::Open(&bufferReader)).ValueOrDie();
-
-    auto batch = batchReader->Next().ValueOrDie();
-    return batch;
-}
-
-std::vector<std::shared_ptr<arrow::RecordBatch>> MakeAllBatch(const TStringStream& outputStream, int batchNumb)
-{
-    auto buffer = arrow::Buffer(reinterpret_cast<const uint8_t*>(outputStream.Data()), outputStream.Size());
-    arrow::io::BufferReader bufferReader(buffer);
-
-    std::shared_ptr<arrow::ipc::RecordBatchStreamReader> batchReader = (arrow::ipc::RecordBatchStreamReader::Open(&bufferReader)).ValueOrDie();
-
-    std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
-    for (int i = 0; i < batchNumb; i++) {
-        auto batch = batchReader->Next().ValueOrDie();
-        if (batch == nullptr) {
-            batchReader = (arrow::ipc::RecordBatchStreamReader::Open(&bufferReader)).ValueOrDie();
-            batchNumb++;
-        } else {
-            batches.push_back(batch);
-        }
-    }
-    return batches;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-std::vector<int64_t> ReadInterger64Array(const std::shared_ptr<arrow::Array>& array)
-{
-    auto int64Array = std::dynamic_pointer_cast<arrow::Int64Array>(array);
-    YT_VERIFY(int64Array);
-    return  {int64Array->raw_values(), int64Array->raw_values() + array->length()};
-}
-
-std::vector<uint32_t> ReadInterger32Array(const std::shared_ptr<arrow::Array>& array)
-{
-    auto int32Array = std::dynamic_pointer_cast<arrow::UInt32Array>(array);
-    YT_VERIFY(int32Array);
-    return  {int32Array->raw_values(), int32Array->raw_values() + array->length()};
-}
-
-std::vector<std::string> ReadStringArray(const std::shared_ptr<arrow::Array>& array)
-{
-    auto arraySize = array->length();
-    auto binArray = std::dynamic_pointer_cast<arrow::BinaryArray>(array);
-    YT_VERIFY(binArray);
-    std::vector<std::string> stringArray;
-    for (int i = 0; i < arraySize; i++) {
-        stringArray.push_back(binArray->GetString(i));
-    }
-    return stringArray;
-}
-
-std::vector<bool> ReadBoolArray(const std::shared_ptr<arrow::Array>& array)
-{
-    auto arraySize = array->length();
-    auto boolArray = std::dynamic_pointer_cast<arrow::BooleanArray>(array);
-    YT_VERIFY(boolArray);
-    std::vector<bool> result;
-    for (int i = 0; i < arraySize; i++) {
-        result.push_back(boolArray->Value(i));
-    }
-    return result;
-}
-
-std::vector<double> ReadDoubleArray(const std::shared_ptr<arrow::Array>& array)
-{
-    auto doubleArray = std::dynamic_pointer_cast<arrow::DoubleArray>(array);
-    YT_VERIFY(doubleArray);
-    return  {doubleArray->raw_values(), doubleArray->raw_values() + array->length()};
-}
-
-std::vector<std::string> ReadStringArrayFromDict(const std::shared_ptr<arrow::Array>& array)
-{
-    auto dictAr = std::dynamic_pointer_cast<arrow::DictionaryArray>(array);
-    YT_VERIFY(dictAr);
-    auto indices = ReadInterger32Array(dictAr->indices());
-
-    // Get values array.
-    auto values = ReadStringArray(dictAr->dictionary());
-
-    std::vector<std::string> result;
-    for (size_t i = 0; i < indices.size(); i++) {
-        auto index = indices[i];
-        auto value = values[index];
-        result.push_back(value);
-    }
-    return result;
-}
-
-std::vector<std::string> ReadAnyStringArray(const std::shared_ptr<arrow::Array>& array)
-{
-    if (std::dynamic_pointer_cast<arrow::BinaryArray>(array)) {
-        return ReadStringArray(array);
-    } else if (std::dynamic_pointer_cast<arrow::DictionaryArray>(array)) {
-        return ReadStringArrayFromDict(array);
-    }
-    YT_ABORT();
-}
-
-bool IsDictColumn(const std::shared_ptr<arrow::Array>& array)
-{
-    return std::dynamic_pointer_cast<arrow::DictionaryArray>(array) != nullptr;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-using ColumnInteger = std::vector<int64_t>;
-using ColumnString = std::vector<std::string>;
-using ColumnBool = std::vector<bool>;
-using ColumnDouble = std::vector<double>;
-
-using ColumnStringWithNulls = std::vector<std::optional<std::string>>;
-using ColumnBoolWithNulls = std::vector<std::optional<bool>>;
-using ColumnDoubleWithNulls = std::vector<std::optional<double>>;
-
-struct TOwnerRows
-{
-    std::vector<TUnversionedRow> Rows;
-    std::vector<TUnversionedOwningRowBuilder> Builders;
-    TNameTablePtr NameTable;
-    std::vector<TUnversionedOwningRow> OwningRows;
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
-TOwnerRows MakeUnversionedIntegerRows(
-    const std::vector<ColumnInteger>& column,
-    const std::vector<std::string>& columnNames)
-{
-    YT_VERIFY(column.size() > 0);
-
-    auto nameTable = New<TNameTable>();
-
-    std::vector<TUnversionedOwningRowBuilder> rowsBuilders(column[0].size());
-
-    for (int colIdx = 0; colIdx < std::ssize(column); colIdx++) {
-        auto columnId = nameTable->RegisterName(columnNames[colIdx]);
-        for (int rowIndex = 0; rowIndex < std::ssize(column[colIdx]); rowIndex++) {
-            rowsBuilders[rowIndex].AddValue(MakeUnversionedInt64Value(column[colIdx][rowIndex], columnId));
-        }
-    }
-    std::vector<TUnversionedRow> rows;
-    std::vector<TUnversionedOwningRow> owningRows;
-    for (int rowIndex = 0; rowIndex < std::ssize(rowsBuilders); rowIndex++) {
-        owningRows.push_back(rowsBuilders[rowIndex].FinishRow());
-        rows.push_back(owningRows.back().Get());
-    }
-    return {std::move(rows), std::move(rowsBuilders), std::move(nameTable), std::move(owningRows)};
-}
-
-TOwnerRows MakeUnversionedStringRows(
-    const std::vector<ColumnString>& column,
-    const std::vector<std::string>& columnNames)
-{
-    YT_VERIFY(column.size() > 0);
-    std::vector<TString> strings;
-
-    auto nameTable = New<TNameTable>();
-
-    std::vector<TUnversionedOwningRowBuilder> rowsBuilders(column[0].size());
-
-    for (int colIdx = 0; colIdx < std::ssize(column); colIdx++) {
-        auto columnId = nameTable->RegisterName(columnNames[colIdx]);
-        for (int rowIndex = 0; rowIndex < std::ssize(column[colIdx]); rowIndex++) {
-            strings.push_back(TString(column[colIdx][rowIndex]));
-            rowsBuilders[rowIndex].AddValue(MakeUnversionedStringValue(strings.back(), columnId));
-        }
-    }
-    std::vector<TUnversionedRow> rows;
-    std::vector<TUnversionedOwningRow> owningRows;
-    for (int rowIndex = 0; rowIndex < std::ssize(rowsBuilders); rowIndex++) {
-        owningRows.push_back(rowsBuilders[rowIndex].FinishRow());
-        rows.push_back(owningRows.back().Get());
-    }
-    return {std::move(rows), std::move(rowsBuilders), std::move(nameTable), std::move(owningRows)};
-}
-
-std::string MakeRandomString(size_t stringSize)
-{
-    std::string randomString;
-    randomString.reserve(stringSize);
-    for (size_t i = 0; i < stringSize; i++) {
-        randomString += ('a' + rand() % 30);
-    }
-    return randomString;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-void CheckColumnNames(
-    std::shared_ptr<arrow::RecordBatch> batch,
-    const std::vector<std::string>& columnNames)
-{
-    EXPECT_EQ(batch->num_columns(), std::ssize(columnNames));
-    for (size_t i = 0; i < columnNames.size(); i++) {
-        EXPECT_EQ(batch->column_name(i), columnNames[i]);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-TEST(Simple, JustWork)
-{
-    EXPECT_TRUE(true);
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<std::string> columnNames = {"integer"};
-
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::Int64),
-    }));
-
-    TStringStream outputStream;
-
-    ColumnInteger column = {42, 179179};
-
-    auto rows = MakeUnversionedIntegerRows({column}, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadInterger64Array(batch->column(0)), column);
-}
-
-TEST(Simple, WorkWithSystemColumns)
-{
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<std::string> columnNames = {"integer"};
-
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::Int64),
-    }));
-
-    TStringStream outputStream;
-
-    ColumnInteger column = {42, 179179};
-
-    auto rows = MakeUnversionedIntegerRows({column}, columnNames);
-
-    auto writer = CreateArrowWriterWithSystemColumns(rows.NameTable, &outputStream, tableSchemas);
-
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-    CheckColumnNames(batch, {"integer", "$row_index", "$range_index", "$table_index", "$tablet_index"});
-    EXPECT_EQ(ReadInterger64Array(batch->column(0)), column);
-}
-
-TEST(Simple, ColumnarBatch)
-{
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<std::string> columnNames = {"integer"};
-
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::Int64),
-    }));
-
-    TStringStream outputStream;
-
-    ColumnInteger column = {42, 179179};
-
-    auto rows = MakeUnversionedIntegerRows({column}, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-    auto columnarBatch = MakeColumnarRowBatch(rows.Rows, tableSchemas[0]);
-    EXPECT_TRUE(writer->WriteBatch(columnarBatch));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadInterger64Array(batch->column(0)), column);
-}
-
-TEST(Simple, RowBatch)
-{
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<std::string> columnNames = {"integer"};
-
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::Int64),
-    }));
-
-    TStringStream outputStream;
-
-    ColumnInteger column = {42, 179179};
-
-    auto rows = MakeUnversionedIntegerRows({column}, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-    auto rowBatch = CreateBatchFromUnversionedRows(MakeSharedRange(std::move(rows.Rows)));
-
-    EXPECT_TRUE(writer->WriteBatch(rowBatch));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadInterger64Array(batch->column(0)), column);
-}
-
-TEST(Simple, Null)
-{
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<std::string> columnNames = {"integer"};
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::Int64),
-                TColumnSchema(TString("null"), EValueType::Null),
-    }));
-
-    TStringStream outputStream;
-    auto nameTable = New<TNameTable>();
-    auto columnId = nameTable->RegisterName(columnNames[0]);
-    auto nullColumnId = nameTable->RegisterName("null");
-
-    TUnversionedRowBuilder row1, row2;
-    row1.AddValue(MakeUnversionedNullValue(columnId));
-    row1.AddValue(MakeUnversionedNullValue(nullColumnId));
-
-    row2.AddValue(MakeUnversionedInt64Value(3, columnId));
-    row2.AddValue(MakeUnversionedNullValue(nullColumnId));
-
-    std::vector<TUnversionedRow> rows = {row1.GetRow(), row2.GetRow()};
-
-
-    auto writer = CreateArrowWriter(nameTable, &outputStream, tableSchemas);
-
-
-    EXPECT_TRUE(writer->Write(rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadInterger64Array(batch->column(0))[1], 3);
-}
-
-TEST(Simple, String)
-{
-    std::vector<std::string> columnNames = {"string"};
-    std::vector<TTableSchemaPtr> tableSchemas;
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::String),
-    }));
-
-    TStringStream outputStream;
-
-    ColumnString column = {"cat", "mouse"};
-
-    auto rows = MakeUnversionedStringRows({column}, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadAnyStringArray(batch->column(0)), column);
-}
-
-TEST(Simple, DictionaryString)
-{
-    std::vector<std::string> columnNames = {"string"};
-    std::vector<TTableSchemaPtr> tableSchemas;
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::String),
-    }));
-    TStringStream outputStream;
-
-    std::string longString, longString2;
-    for (int i = 0; i < 20; i++) {
-        longString += 'a';
-        longString2 += 'b';
-    }
-
-    auto rows = MakeUnversionedStringRows({{longString, longString2, longString, longString2}}, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-
-    CheckColumnNames(batch, columnNames);
-    EXPECT_EQ(ReadAnyStringArray(batch->column(0))[0], longString);
-    EXPECT_TRUE(IsDictColumn(batch->column(0)));
-}
-
-TEST(Simple, DictionaryAndDirectStrings)
-{
-    std::vector<std::string> columnNames = {"string"};
-    std::vector<TTableSchemaPtr> tableSchemas;
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema(TString(columnNames[0]), EValueType::String),
-    }));
-
-    TStringStream outputStream;
-
-    std::string longString, longString2;
-    for (int i = 0; i < 20; i++) {
-        longString += 'a';
-        longString2 += 'b';
-    }
-    ColumnString firstColumn = {longString, longString2, longString, longString2};
-    ColumnString secondColumn = {"cat", "dog", "mouse", "table"};
-
-    auto dictRows = MakeUnversionedStringRows({firstColumn}, columnNames);
-    auto directRows = MakeUnversionedStringRows({secondColumn}, columnNames);
-
-    auto writer = CreateArrowWriter(dictRows.NameTable, &outputStream, tableSchemas);
-
-    // Write first batch, that will be decode as dictionary.
-    EXPECT_TRUE(writer->Write(dictRows.Rows));
-
-    // Write second batch, that will be decode as direct.
-    EXPECT_TRUE(writer->Write(directRows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-
-    auto batches = MakeAllBatch(outputStream, 2);
-
-    CheckColumnNames(batches[0], columnNames);
-    CheckColumnNames(batches[1], columnNames);
-
-    EXPECT_EQ(ReadAnyStringArray(batches[0]->column(0)), firstColumn);
-    EXPECT_EQ(ReadAnyStringArray(batches[1]->column(0)), secondColumn);
-}
-
-TEST(StressOneBatch, Integer)
-{
-    // Constans.
-    const size_t columnsCount = 100;
-    const size_t rowsCount = 100;
-
-    std::vector<TTableSchemaPtr> tableSchemas;
-    TStringStream outputStream;
-
-    std::vector<std::string> columnNames;
-    std::vector<ColumnInteger> columnsElements(columnsCount);
-
-    for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-        // Create column name.
-        std::string ColumnName = "integer" + std::to_string(columnIndex);
-        columnNames.push_back(ColumnName);
-
-        for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-            columnsElements[columnIndex].push_back(rand());
-        }
-    }
-
-    std::vector<TColumnSchema> schemas_;
-    for (size_t columnIdx = 0; columnIdx < columnsCount; columnIdx++) {
-        schemas_.push_back(TColumnSchema(TString(columnNames[columnIdx]), EValueType::Int64));
-    }
-    tableSchemas.push_back(New<TTableSchema>(schemas_));
-
-    auto rows = MakeUnversionedIntegerRows(columnsElements, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-
-    CheckColumnNames(batch, columnNames);
-
-    for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-        EXPECT_EQ(ReadInterger64Array(batch->column(columnIndex)), columnsElements[columnIndex]);
-    }
-}
-
-TEST(StressOneBatch, String)
-{
-    const size_t columnsCount = 10;
-    const size_t rowsCount = 10;
-    const size_t stringSize = 10;
-
-    std::vector<TTableSchemaPtr> tableSchemas;
-
-    TStringStream outputStream;
-
-    std::vector<std::string> columnNames;
-    std::vector<ColumnString> columnsElements(columnsCount);
-
-    for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-
-        std::string ColumnName = "string" + std::to_string(columnIndex);
-        columnNames.push_back(ColumnName);
-        for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-            columnsElements[columnIndex].push_back(MakeRandomString(stringSize));
-        }
-    }
-
-    std::vector<TColumnSchema> schemas_;
-    for (size_t columnIdx = 0; columnIdx < columnsCount; columnIdx++) {
-        schemas_.push_back(TColumnSchema(TString(columnNames[columnIdx]), EValueType::String));
-    }
-    tableSchemas.push_back(New<TTableSchema>(schemas_));
-
-    auto rows = MakeUnversionedStringRows(columnsElements, columnNames);
-
-    auto writer = CreateArrowWriter(rows.NameTable, &outputStream, tableSchemas);
-
-    EXPECT_TRUE(writer->Write(rows.Rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batch = MakeBatch(outputStream);
-
-    CheckColumnNames(batch, columnNames);
-
-    for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-        EXPECT_EQ(ReadAnyStringArray(batch->column(columnIndex)), columnsElements[columnIndex]);
-    }
-}
-
-TEST(StressOneBatch, MixTypes)
-{
-    // Constants.
-    const size_t rowsCount = 10;
-    const size_t stringSize = 10;
-
-    std::vector<TTableSchemaPtr> tableSchemas;
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema("bool", EValueType::Boolean),
-                TColumnSchema("double", EValueType::Double),
-                TColumnSchema("any", EValueType::Any)}));
-
-    TStringStream outputStream;
-
-    auto nameTable = New<TNameTable>();
-    std::vector<TUnversionedOwningRowBuilder> rowsBuilders(rowsCount);
-
-    std::vector<std::string> columnNames;
-
-    std::vector<bool> boolColumn;
-    std::vector<double> doubleColumn;
-    std::vector<std::string> anyColumn;
-    std::vector<TUnversionedRow> rows;
-
-    // Fill bool column.
-    std::string ColumnName = "bool";
-    auto boolId = nameTable->RegisterName(ColumnName);
-    columnNames.push_back(ColumnName);
-    for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-        boolColumn.push_back((rand() % 2) == 0);
-
-        rowsBuilders[rowIndex].AddValue(MakeUnversionedBooleanValue(boolColumn[rowIndex], boolId));
-    }
-
-    // Fill double column.
-    ColumnName = "double";
-    auto columnId = nameTable->RegisterName(ColumnName);
-    columnNames.push_back(ColumnName);
-    for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-        doubleColumn.push_back((double)(rand() % 100) / 10.0);
-        rowsBuilders[rowIndex].AddValue(MakeUnversionedDoubleValue(doubleColumn[rowIndex], columnId));
-    }
-
-    // Fill any column.
-    ColumnName = "any";
-    auto anyId = nameTable->RegisterName(ColumnName);
-    columnNames.push_back(ColumnName);
-    for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-        std::string randomString = MakeRandomString(stringSize);
-
-        anyColumn.push_back(randomString);
-
-        rowsBuilders[rowIndex].AddValue(MakeUnversionedAnyValue(randomString, anyId));
-    }
-
-    std::vector<TUnversionedOwningRow> owningRows;
-    for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-        owningRows.push_back(rowsBuilders[rowIndex].FinishRow());
-        rows.push_back(owningRows.back().Get());
-    }
-
-    auto writer = CreateArrowWriter(nameTable, &outputStream, tableSchemas);
-
-    EXPECT_TRUE(writer->Write(rows));
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-
-    auto batch = MakeBatch(outputStream);
-
-    CheckColumnNames(batch, columnNames);
-
-    EXPECT_EQ(ReadBoolArray(batch->column(0)), boolColumn);
-    EXPECT_EQ(ReadDoubleArray(batch->column(1)), doubleColumn);
-    EXPECT_EQ(ReadAnyStringArray(batch->column(2)), anyColumn);
-}
-
-TEST(StressMultiBatch, Integer)
-{
-    // Constants.
-    const size_t columnsCount = 10;
-    const size_t rowsCount = 10;
-    const size_t numbOfBatch = 10;
-
-    std::vector<std::string> columnNames;
-    std::vector<TTableSchemaPtr> tableSchemas;
-    std::vector<TColumnSchema> schemas_;
-
-    for (size_t columnIdx = 0; columnIdx < columnsCount; columnIdx++) {
-        std::string ColumnName = "integer" + std::to_string(columnIdx);
-        columnNames.push_back(ColumnName);
-        schemas_.push_back(TColumnSchema(TString(columnNames[columnIdx]), EValueType::Int64));
-    }
-    tableSchemas.push_back(New<TTableSchema>(schemas_));
-
-    TStringStream outputStream;
-    std::vector<std::vector<ColumnInteger>> columnsElements(numbOfBatch, std::vector<ColumnInteger>(columnsCount));
-
-    auto nameTable = New<TNameTable>();
-    for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-        std::string ColumnName = "integer" + std::to_string(columnIndex);
-        nameTable->RegisterName(ColumnName);
-    }
-    auto writer = CreateArrowWriter(nameTable, &outputStream, tableSchemas);
-
-
-    for (size_t batchIndex = 0; batchIndex < numbOfBatch; batchIndex++) {
-
-        for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-            std::string ColumnName = "integer" + std::to_string(columnIndex);
-            for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-                columnsElements[batchIndex][columnIndex].push_back(rand());
-            }
-        }
-
-        auto rows = MakeUnversionedIntegerRows(columnsElements[batchIndex], columnNames);
-        EXPECT_TRUE(writer->Write(rows.Rows));
-    }
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-
-    auto batches = MakeAllBatch(outputStream, numbOfBatch);
-
-    size_t batchIndex = 0;
-    for (auto& batch : batches) {
-        for (size_t columnIndex = 0; columnIndex < columnsCount; columnIndex++) {
-            CheckColumnNames(batch, columnNames);
-            EXPECT_EQ(ReadInterger64Array(batch->column(columnIndex)), columnsElements[batchIndex][columnIndex]);
-        }
-        batchIndex++;
-    }
-}
-
-TEST(StressMultiBatch, MixTypes)
-{
-    // Сonstants.
-    const size_t rowsCount = 10;
-    const size_t numbOfBatch = 10;
-    const size_t stringSize = 10;
-
-    std::vector<TTableSchemaPtr> tableSchemas;
-    tableSchemas.push_back(New<TTableSchema>(std::vector{
-                TColumnSchema("bool", EValueType::Boolean),
-                TColumnSchema("double", EValueType::Double),
-                TColumnSchema("any", EValueType::Any)}));
-
-    TStringStream outputStream;
-
-    auto nameTable = New<TNameTable>();
-
-    std::vector<std::string> columnNames = {"bool", "double", "any"};
-    auto boolId = nameTable->RegisterName(columnNames[0]);
-    auto doubleId = nameTable->RegisterName(columnNames[1]);
-    auto anyId = nameTable->RegisterName(columnNames[2]);
-
-    std::vector<ColumnBoolWithNulls> boolColumns(numbOfBatch);
-    std::vector<ColumnDoubleWithNulls> doubleColumns(numbOfBatch);
-    std::vector<ColumnStringWithNulls> anyColumns(numbOfBatch);
-
-    auto writer = CreateArrowWriter(nameTable, &outputStream, tableSchemas);
-
-    std::vector<TUnversionedOwningRow> owningRows;
-
-    for (size_t batchIndex = 0; batchIndex < numbOfBatch; batchIndex++) {
-        std::vector<TUnversionedOwningRowBuilder> rowsBuilders(rowsCount);
-        std::vector<TUnversionedRow> rows;
-
-        for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-            if (rand() % 2 == 0) {
-                boolColumns[batchIndex].push_back(std::nullopt);
-                doubleColumns[batchIndex].push_back(std::nullopt);
-                anyColumns[batchIndex].push_back(std::nullopt);
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedNullValue(boolId));
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedNullValue(doubleId));
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedNullValue(anyId));
-            } else {
-                boolColumns[batchIndex].push_back((rand() % 2) == 0);
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedBooleanValue(*boolColumns[batchIndex][rowIndex], boolId));
-
-                doubleColumns[batchIndex].push_back((double)(rand() % 100) / 10.0);
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedDoubleValue(*doubleColumns[batchIndex][rowIndex], doubleId));
-
-                std::string randomString = MakeRandomString(stringSize);
-                anyColumns[batchIndex].push_back(randomString);
-                rowsBuilders[rowIndex].AddValue(MakeUnversionedAnyValue(randomString, anyId));
-            }
-            owningRows.push_back(rowsBuilders[rowIndex].FinishRow());
-            rows.push_back(owningRows.back().Get());
-        }
-
-        EXPECT_TRUE(writer->Write(rows));
-    }
-
-    writer->Close()
-        .Get()
-        .ThrowOnError();
-
-    auto batches = MakeAllBatch(outputStream, numbOfBatch);
-    size_t batchIndex = 0;
-    for (auto& batch : batches) {
-        CheckColumnNames(batch, columnNames);
-
-        auto boolAr = ReadBoolArray(batch->column(0));
-        auto doubleAr = ReadDoubleArray(batch->column(1));
-        auto anyAr = ReadAnyStringArray(batch->column(2));
-
-        for (size_t rowIndex = 0; rowIndex < rowsCount; rowIndex++) {
-            if (boolColumns[batchIndex][rowIndex] == std::nullopt) {
-                EXPECT_TRUE(batch->column(0)->IsNull(rowIndex));
-                EXPECT_TRUE(batch->column(1)->IsNull(rowIndex));
-                EXPECT_TRUE(batch->column(2)->IsNull(rowIndex));
-            } else {
-                EXPECT_EQ(boolAr[rowIndex], *boolColumns[batchIndex][rowIndex]);
-                EXPECT_EQ(doubleAr[rowIndex], *doubleColumns[batchIndex][rowIndex]);
-                EXPECT_EQ(anyAr[rowIndex], *anyColumns[batchIndex][rowIndex]);
-            }
-        }
-
-        batchIndex++;
-    }
-}
-
-} // namespace
-} // namespace NYT::NTableClient

+ 0 - 12
yt/yt/client/unittests/ya.make

@@ -68,20 +68,8 @@ PEERDIR(
     yt/yt/library/named_value
 
     yt/yt_proto/yt/formats
-
-    contrib/libs/apache/arrow
 )
 
-IF (NOT OPENSOURCE)
-    SRCS(
-        arrow_writer_ut.cpp
-    )
-
-    PEERDIR(
-        yt/yt/ytlib
-    )
-ENDIF()
-
 RESOURCE(
     ${ARCADIA_ROOT}/library/cpp/type_info/ut/test-data/good-types.txt /types/good
     ${ARCADIA_ROOT}/library/cpp/type_info/ut/test-data/bad-types.txt /types/bad

+ 0 - 100
yt/yt/library/column_converters/boolean_column_converter.cpp

@@ -1,100 +0,0 @@
-#include "boolean_column_converter.h"
-
-#include "helpers.h"
-
-#include <yt/yt/client/table_client/schema.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-
-namespace NYT::NColumnConverters {
-
-////////////////////////////////////////////////////////////////////////////////
-
-namespace {
-
-void FillColumnarBooleanValues(
-    TBatchColumn* column,
-    i64 startIndex,
-    i64 valueCount,
-    TRef bitmap)
-{
-    column->StartIndex = startIndex;
-    column->ValueCount = valueCount;
-
-    auto& values = column->Values.emplace();
-    values.BitWidth = 1;
-    values.Data = bitmap;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-class TBooleanColumnConverter
-    : public IColumnConverter
-{
-public:
-    TBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
-        : ColumnIndex_(columnIndex)
-        , ColumnSchema_(columnSchema)
-    { }
-
-    TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) override
-    {
-        Reset();
-        AddValues(rowsValues);
-
-        auto column = std::make_shared<TBatchColumn>();
-        auto nullBitmapRef = NullBitmap_.Flush<TConverterTag>();
-        auto valuesRef = Values_.Flush<TConverterTag>();
-
-        FillColumnarBooleanValues(column.get(), 0, rowsValues.size(), valuesRef);
-        FillColumnarNullBitmap(column.get(), 0, rowsValues.size(), nullBitmapRef);
-
-        column->Type = ColumnSchema_.LogicalType();
-        column->Id = ColumnIndex_;
-
-        TOwningColumn owner = {
-            .Column = std::move(column),
-            .NullBitmap = std::move(nullBitmapRef),
-            .ValueBuffer = std::move(valuesRef),
-        };
-
-        return {{owner}, owner.Column.get()};
-    }
-
-
-private:
-    const int ColumnIndex_;
-    const NTableClient::TColumnSchema ColumnSchema_;
-
-    TBitmapOutput Values_;
-    TBitmapOutput NullBitmap_;
-
-    void Reset()
-    {
-        Values_.Flush<TConverterTag>();
-        NullBitmap_.Flush<TConverterTag>();
-    }
-
-    void AddValues(const std::vector<TUnversionedRowValues>& rowsValues)
-    {
-        for (auto rowValues : rowsValues) {
-            auto value = rowValues[ColumnIndex_];
-            bool isNull = value == nullptr || value->Type == NTableClient::EValueType::Null;
-            bool data = isNull ? false : value->Data.Boolean;
-            NullBitmap_.Append(isNull);
-            Values_.Append(data);
-        }
-    }
-};
-
-} // namespace
-
-////////////////////////////////////////////////////////////////////////////////
-
-IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema)
-{
-    return std::make_unique<TBooleanColumnConverter>(columnIndex, columnSchema);
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NColumnConverters

+ 0 - 15
yt/yt/library/column_converters/boolean_column_converter.h

@@ -1,15 +0,0 @@
-#pragma once
-
-#include "column_converter.h"
-
-#include <yt/yt/client/table_client/public.h>
-
-namespace NYT::NColumnConverters {
-
-////////////////////////////////////////////////////////////////////////////////
-
-IColumnConverterPtr CreateBooleanColumnConverter(int columnIndex, const NTableClient::TColumnSchema& columnSchema);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NColumnConverters

+ 0 - 91
yt/yt/library/column_converters/column_converter.cpp

@@ -1,91 +0,0 @@
-#include "column_converter.h"
-
-#include "boolean_column_converter.h"
-#include "floating_point_column_converter.h"
-#include "integer_column_converter.h"
-#include "null_column_converter.h"
-#include "string_column_converter.h"
-
-#include <yt/yt/client/table_client/row_base.h>
-#include <yt/yt/client/table_client/schema.h>
-#include <yt/yt/client/table_client/unversioned_row.h>
-
-namespace NYT::NColumnConverters {
-
-using namespace NTableClient;
-
-////////////////////////////////////////////////////////////////////////////////
-
-IColumnConverterPtr CreateColumnConvert(
-    const NTableClient::TColumnSchema& columnSchema,
-    int columnIndex)
-{
-    switch (columnSchema.GetWireType()) {
-        case EValueType::Int64:
-            return CreateInt64ColumnConverter(columnIndex, columnSchema);
-
-        case EValueType::Uint64:
-            return CreateUint64ColumnConverter(columnIndex, columnSchema);
-
-        case EValueType::Double:
-            switch (columnSchema.CastToV1Type()) {
-                case NTableClient::ESimpleLogicalValueType::Float:
-                    return CreateFloatingPoint32ColumnConverter(columnIndex, columnSchema);
-                default:
-                    return CreateFloatingPoint64ColumnConverter(columnIndex, columnSchema);
-            }
-
-        case EValueType::String:
-            return CreateStringConverter(columnIndex, columnSchema);
-
-        case EValueType::Boolean:
-            return CreateBooleanColumnConverter(columnIndex, columnSchema);
-
-        case EValueType::Any:
-            return CreateAnyConverter(columnIndex, columnSchema);
-
-        case EValueType::Composite:
-            return CreateCompositeConverter(columnIndex, columnSchema);
-
-        case EValueType::Null:
-            return CreateNullConverter(columnIndex);
-
-        case EValueType::Min:
-        case EValueType::TheBottom:
-        case EValueType::Max:
-            break;
-    }
-    ThrowUnexpectedValueType(columnSchema.GetWireType());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-
-TConvertedColumnRange ConvertRowsToColumns(
-    TRange<TUnversionedRow> rows,
-    const std::vector<TColumnSchema>& columnSchema)
-{
-    TConvertedColumnRange convertedColumnsRange;
-    std::vector<TUnversionedRowValues> rowsValues;
-    rowsValues.reserve(rows.size());
-
-    for (const auto& row : rows) {
-        TUnversionedRowValues rowValues;
-        rowValues.resize(columnSchema.size(), nullptr);
-        for (const auto* item = row.Begin(); item != row.End(); ++item) {
-            rowValues[item->Id] = item;
-        }
-        rowsValues.push_back(std::move(rowValues));
-    }
-
-    for (int columnId = 0; columnId < std::ssize(columnSchema); columnId++) {
-        auto converter = CreateColumnConvert(columnSchema[columnId], columnId);
-        auto columns = converter->Convert(rowsValues);
-        convertedColumnsRange.push_back(columns);
-    }
-    return convertedColumnsRange;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NColumnConverters

+ 0 - 54
yt/yt/library/column_converters/column_converter.h

@@ -1,54 +0,0 @@
-#pragma once
-
-#include <yt/yt/client/table_client/row_batch.h>
-
-#include <yt/yt/core/misc/bitmap.h>
-
-#include <library/cpp/yt/memory/ref.h>
-
-namespace NYT::NColumnConverters {
-
-////////////////////////////////////////////////////////////////////////////////
-
-using TBatchColumn = NTableClient::IUnversionedColumnarRowBatch::TColumn;
-using TBatchColumnPtr = std::shared_ptr<TBatchColumn>;
-using TUnversionedRowValues = std::vector<const NTableClient::TUnversionedValue*>;
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct TOwningColumn
-{
-    TBatchColumnPtr Column;
-    TSharedRef NullBitmap;
-    TSharedRef ValueBuffer;
-    TSharedRef StringBuffer;
-};
-
-struct TConvertedColumn
-{
-    std::vector<TOwningColumn> Columns;
-    TBatchColumn* RootColumn;
-};
-
-using TConvertedColumnRange = std::vector<TConvertedColumn>;
-
-////////////////////////////////////////////////////////////////////////////////
-
-struct IColumnConverter
-    : private TNonCopyable
-{
-    virtual ~IColumnConverter() = default;
-    virtual TConvertedColumn Convert(const std::vector<TUnversionedRowValues>& rowsValues) = 0;
-};
-
-using IColumnConverterPtr = std::unique_ptr<IColumnConverter>;
-
-////////////////////////////////////////////////////////////////////////////////
-
-TConvertedColumnRange ConvertRowsToColumns(
-    TRange<NTableClient::TUnversionedRow> rows,
-    const std::vector<NTableClient::TColumnSchema>& columnSchema);
-
-////////////////////////////////////////////////////////////////////////////////
-
-} // namespace NYT::NColumnConverters

Some files were not shown because too many files changed in this diff