Просмотр исходного кода

KIKIMR-19522 BTreeIndex Charge Keys (#1102)

kungurtsev 1 год назад
Родитель
Сommit
6e34ae9e55

+ 179 - 82
ydb/core/tablet_flat/flat_part_charge_btree_index.h

@@ -11,56 +11,110 @@ class TChargeBTreeIndex : public ICharge {
     using TBtreeIndexMeta = NPage::TBtreeIndexMeta;
     using TRecIdx = NPage::TRecIdx;
     using TGroupId = NPage::TGroupId;
+    using TChild = TBtreeIndexNode::TChild;
+
+    // TODO: store PageId only instead of TChild?
+    struct TNodeState : TBtreeIndexNode, TChild {
+        TRowId BeginRowId;
+        TRowId EndRowId;
+
+        TNodeState(TSharedData data, TChild meta, TRowId beginRowId, TRowId endRowId)
+            : TBtreeIndexNode(data) 
+            , TChild(meta)
+            , BeginRowId(beginRowId)
+            , EndRowId(endRowId)
+        {
+        }
+    };
 
 public:
     TChargeBTreeIndex(IPages *env, const TPart &part, TTagsRef tags, bool includeHistory = false)
         : Part(&part)
+        , Scheme(*Part->Scheme)
         , Env(env) {
-        Y_UNUSED(env);
         Y_UNUSED(part);
         Y_UNUSED(tags);
         Y_UNUSED(includeHistory);
     }
 
 public:
-    TResult Do(const TCells key1, const TCells key2, TRowId row1, TRowId row2, 
+    TResult Do(TCells key1, TCells key2, TRowId row1, TRowId row2, 
             const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
         bool ready = true;
 
-        Y_UNUSED(key1);
-        Y_UNUSED(key2);
-        Y_UNUSED(keyDefaults);
         Y_UNUSED(itemsLimit);
         Y_UNUSED(bytesLimit);
 
         const auto& meta = Part->IndexPages.BTreeGroups[0];
 
+        if (meta.LevelCount == 0) {
+            ready &= HasDataPage(meta.PageId, { });
+            return { ready, true };
+        }
+
         if (Y_UNLIKELY(row1 >= meta.RowCount)) {
             return { true, true }; // already out of bounds, nothing to precharge
         }
         if (Y_UNLIKELY(row1 > row2)) {
             row2 = row1; // will not go further than row1
         }
+        TRowId sliceRow2 = row2;
+        if (Y_UNLIKELY(key1 && key2 && Compare(key1, key2, keyDefaults) > 0)) {
+            key2 = key1; // will not go further than key1
+        }
 
-        TVector<TBtreeIndexNode> level, nextLevel(Reserve(2));
-        for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
-            if (height == 0) {
-                ready &= TryLoadNode(meta.PageId, nextLevel);
-            } else {
-                for (ui32 i : xrange<ui32>(level.size())) {
-                    TRecIdx from = 0, to = level[i].GetKeysCount();
-                    if (i == 0) {
-                        from = level[i].Seek(row1);
-                    }
-                    if (i + 1 == level.size() && row2 < meta.RowCount) {
-                        to = level[i].Seek(row2);
-                    }
-                    for (TRecIdx j : xrange(from, to + 1)) {
-                        ready &= TryLoadNode(level[i].GetShortChild(j).PageId, nextLevel);
+        TVector<TNodeState> level(Reserve(3)), nextLevel(Reserve(3));
+        TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
+        TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
+
+        const auto iterateLevel = [&](const auto& tryLoadNext) {
+            for (ui32 i : xrange<ui32>(level.size())) {
+                if (level[i].PageId == key1PageId) {
+                    TRecIdx pos = level[i].Seek(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+                    key1PageId = level[i].GetShortChild(pos).PageId;
+                    if (pos) {
+                        // move row1 to the first key >= key1
+                        row1 = Max(row1, level[i].GetShortChild(pos - 1).RowCount);
                     }
                 }
+                if (level[i].PageId == key2PageId) {
+                    TRecIdx pos = level[i].Seek(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+                    key2PageId = level[i].GetShortChild(pos).PageId;
+                    // move row2 to the first key > key2
+                    row2 = Min(row2, level[i].GetShortChild(pos).RowCount);
+                    // always charge row2, no matter what keys are
+                    row2 = Max(row2, row1);
+                }
+                
+                if (level[i].EndRowId <= row1 || level[i].BeginRowId > row2) {
+                    continue;
+                }
+
+                TRecIdx from = 0, to = level[i].GetKeysCount();
+                if (level[i].BeginRowId < row1) {
+                    from = level[i].Seek(row1);
+                }
+                if (level[i].EndRowId > row2 + 1) {
+                    to = level[i].Seek(row2);
+                }
+                for (TRecIdx j : xrange(from, to + 1)) {
+                    ready &= tryLoadNext(level[i], j);
+                }
             }
+        };
 
+        const auto tryLoadNode = [&](TNodeState& current, TRecIdx pos) -> bool {
+            return TryLoadNode(current, pos, nextLevel);
+        };
+
+        const auto hasDataPage = [&](TNodeState& current, TRecIdx pos) -> bool {
+            return HasDataPage(current.GetShortChild(pos).PageId, { });
+        };
+
+        ready &= TryLoadRoot(meta, level);
+
+        for (ui32 height = 1; height < meta.LevelCount && ready; height++) {
+            iterateLevel(tryLoadNode);
             level.swap(nextLevel);
             nextLevel.clear();
         }
@@ -70,66 +124,89 @@ public:
             return {false, false};
         }
 
-        if (meta.LevelCount == 0) {
-            ready &= HasDataPage(meta.PageId, { });
-        } else {
-            for (ui32 i : xrange<ui32>(level.size())) {
-                TRecIdx from = 0, to = level[i].GetKeysCount();
-                if (i == 0) {
-                    from = level[i].Seek(row1);
-                }
-                if (i + 1 == level.size() && row2 < meta.RowCount) {
-                    to = level[i].Seek(row2);
-                }
-                for (TRecIdx j : xrange(from, to + 1)) {
-                    ready &= HasDataPage(level[i].GetShortChild(j).PageId, { });
-                }
-            }
-        }
+        iterateLevel(hasDataPage);
 
-        // TODO: overshot for keys search
-        return {ready, false};
+        return {ready, row2 == sliceRow2};
     }
 
-    TResult DoReverse(const TCells key1, const TCells key2, TRowId row1, TRowId row2, 
+    TResult DoReverse(TCells key1, TCells key2, TRowId row1, TRowId row2, 
             const TKeyCellDefaults &keyDefaults, ui64 itemsLimit, ui64 bytesLimit) const noexcept override {
         bool ready = true;
         
-        Y_UNUSED(key1);
-        Y_UNUSED(key2);
-        Y_UNUSED(keyDefaults);
         Y_UNUSED(itemsLimit);
         Y_UNUSED(bytesLimit);
 
         const auto& meta = Part->IndexPages.BTreeGroups[0];
 
+        if (meta.LevelCount == 0) {
+            ready &= HasDataPage(meta.PageId, { });
+            return { ready, true };
+        }
+
         if (Y_UNLIKELY(row1 >= meta.RowCount)) {
             row1 = meta.RowCount - 1; // start from the last row
         }
-        if (Y_UNLIKELY(row1 < row2)) {
+        if (Y_UNLIKELY(row2 > row1)) {
             row2 = row1; // will not go further than row1
         }
+        TRowId sliceRow2 = row2;
+        if (Y_UNLIKELY(key1 && key2 && Compare(key2, key1, keyDefaults) > 0)) {
+            key2 = key1; // will not go further than key1
+        }
 
         // level contains nodes in reverse order
-        TVector<TBtreeIndexNode> level, nextLevel(Reserve(2));
-        for (ui32 height = 0; height < meta.LevelCount && ready; height++) {
-            if (height == 0) {
-                ready &= TryLoadNode(meta.PageId, nextLevel);
-            } else {
-                for (ui32 i : xrange<ui32>(level.size())) {
-                    TRecIdx from = level[i].GetKeysCount(), to = 0;
-                    if (i == 0) {
-                        from = level[i].Seek(row1);
-                    }
-                    if (i + 1 == level.size() && row2 < meta.RowCount) {
-                        to = level[i].Seek(row2);
-                    }
-                    for (TRecIdx j = from + 1; j > to; j--) {
-                        ready &= TryLoadNode(level[i].GetShortChild(j - 1).PageId, nextLevel);
+        TVector<TNodeState> level(Reserve(3)), nextLevel(Reserve(3));
+        TPageId key1PageId = key1 ? meta.PageId : Max<TPageId>();
+        TPageId key2PageId = key2 ? meta.PageId : Max<TPageId>();
+
+        const auto iterateLevel = [&](const auto& tryLoadNext) {
+            for (ui32 i : xrange<ui32>(level.size())) {
+                if (level[i].PageId == key1PageId) {
+                    TRecIdx pos = level[i].SeekReverse(ESeek::Lower, key1, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+                    key1PageId = level[i].GetShortChild(pos).PageId;
+                    // move row1 to the first key <= key1
+                    row1 = Min(row1, level[i].GetShortChild(pos).RowCount - 1);
+                }
+                if (level[i].PageId == key2PageId) {
+                    TRecIdx pos = level[i].SeekReverse(ESeek::Lower, key2, Scheme.Groups[0].ColsKeyIdx, &keyDefaults);
+                    key2PageId = level[i].GetShortChild(pos).PageId;
+                    // move row2 to the first key > key2
+                    if (pos) {
+                        row2 = Max(row2, level[i].GetShortChild(pos - 1).RowCount - 1);
+                        // always charge row1, no matter what keys are
+                        row2 = Min(row2, row1);
                     }
                 }
+                
+                if (level[i].EndRowId <= row2 || level[i].BeginRowId > row1) {
+                    continue;
+                }
+
+                TRecIdx from = level[i].GetKeysCount(), to = 0;
+                if (level[i].EndRowId > row1 + 1) {
+                    from = level[i].Seek(row1);
+                }
+                if (level[i].BeginRowId < row2) {
+                    to = level[i].Seek(row2);
+                }
+                for (TRecIdx j = from + 1; j > to; j--) {
+                    ready &= tryLoadNext(level[i], j - 1);
+                }
             }
+        };
+
+        const auto tryLoadNode = [&](TNodeState& current, TRecIdx pos) -> bool {
+            return TryLoadNode(current, pos, nextLevel);
+        };
+
+        const auto hasDataPage = [&](TNodeState& current, TRecIdx pos) -> bool {
+            return HasDataPage(current.GetShortChild(pos).PageId, { });
+        };
+
+        ready &= TryLoadRoot(meta, level);
 
+        for (ui32 height = 1; height < meta.LevelCount && ready; height++) {
+            iterateLevel(tryLoadNode);
             level.swap(nextLevel);
             nextLevel.clear();
         }
@@ -139,25 +216,9 @@ public:
             return {false, false};
         }
 
-        if (meta.LevelCount == 0) {
-            ready &= HasDataPage(meta.PageId, { });
-        } else {
-            for (ui32 i : xrange<ui32>(level.size())) {
-                TRecIdx from = level[i].GetKeysCount(), to = 0;
-                if (i == 0) {
-                    from = level[i].Seek(row1);
-                }
-                if (i + 1 == level.size() && row2 < meta.RowCount) {
-                    to = level[i].Seek(row2);
-                }
-                for (TRecIdx j = from + 1; j > to; j--) {
-                    ready &= HasDataPage(level[i].GetShortChild(j - 1).PageId, { });
-                }
-            }
-        }
+        iterateLevel(hasDataPage);
 
-        // TODO: overshot for keys search
-        return {ready, false};
+        return {ready, row2 == sliceRow2};
     }
 
 private:
@@ -165,17 +226,53 @@ private:
         return Env->TryGetPage(Part, pageId, groupId);
     }
 
-    bool TryLoadNode(TPageId pageId, TVector<TBtreeIndexNode>& level) const noexcept {
-        auto page = Env->TryGetPage(Part, pageId);
-        if (page) {
-            level.emplace_back(*page);
-            return true;
+    bool TryLoadRoot(const TBtreeIndexMeta& meta, TVector<TNodeState>& level) const noexcept {
+        auto page = Env->TryGetPage(Part, meta.PageId);
+        if (!page) {
+            return false;
         }
-        return false;
+
+        level.emplace_back(*page, meta, 0, meta.RowCount);
+        return true;
+    }
+
+    bool TryLoadNode(TNodeState& current, TRecIdx pos, TVector<TNodeState>& level) const noexcept {
+        Y_ABORT_UNLESS(pos < current.GetChildrenCount(), "Should point to some child");
+
+        auto child = current.GetChild(pos);
+
+        auto page = Env->TryGetPage(Part, child.PageId);
+        if (!page) {
+            return false;
+        }
+
+        TRowId beginRowId = pos ? current.GetChild(pos - 1).RowCount : current.BeginRowId;
+        TRowId endRowId = child.RowCount;
+
+        level.emplace_back(*page, child, beginRowId, endRowId);
+        return true;
+    }
+
+    int Compare(TCells left, TCells right, const TKeyCellDefaults &keyDefaults) const noexcept
+    {
+        Y_DEBUG_ABORT_UNLESS(left, "Empty keys should be handled separately");
+        Y_DEBUG_ABORT_UNLESS(right, "Empty keys should be handled separately");
+
+        for (TPos it = 0; it < Min(left.size(), right.size(), keyDefaults.Size()); it++) {
+            if (int cmp = CompareTypedCells(left[it], right[it], keyDefaults.Types[it])) {
+                return cmp;
+            }
+        }
+
+        return left.size() == right.size() 
+            ? 0
+            // Missing point cells are filled with a virtual +inf
+            : (left.size() > right.size() ? -1 : 1);
     }
 
 private:
     const TPart* const Part;
+    const TPartScheme &Scheme;
     IPages* const Env;
 };
 

+ 19 - 27
ydb/core/tablet_flat/flat_part_charge_range.cpp

@@ -7,20 +7,14 @@ bool ChargeRange(IPages *env, const TCells key1, const TCells key2,
             const TRun &run, const TKeyCellDefaults &keyDefaults, TTagsRef tags,
             ui64 items, ui64 bytes, bool includeHistory) noexcept
 {
-    if (run.size() == 1) {
-        auto pos = run.begin();
-        TRowId row1 = pos->Slice.BeginRowId();
-        TRowId row2 = pos->Slice.EndRowId() - 1;
-        return CreateCharge(env, *pos->Part, tags, includeHistory)->Do(key1, key2, row1, row2, keyDefaults, items, bytes).Ready;
-    }
-
     bool ready = true;
     auto pos = run.LowerBound(key1);
 
     if (pos == run.end())
         return true;
 
-    bool fromStart = TSlice::CompareSearchKeyFirstKey(key1, pos->Slice, keyDefaults) <= 0;
+    // key1 <= FirstKey
+    bool chargeFromSliceFirstRow = TSlice::CompareSearchKeyFirstKey(key1, pos->Slice, keyDefaults) <= 0;
 
     while (pos != run.end()) {
         TRowId row1 = pos->Slice.BeginRowId();
@@ -29,30 +23,32 @@ bool ChargeRange(IPages *env, const TCells key1, const TCells key2,
         const int cmp = TSlice::CompareLastKeySearchKey(pos->Slice, key2, keyDefaults);
 
         TArrayRef<const TCell> key1r;
-        if (!fromStart) {
+        if (!chargeFromSliceFirstRow) {
             key1r = key1;
         }
         TArrayRef<const TCell> key2r;
-        if (cmp > 0 /* slice->LastKey > key2 */) {
+        if (cmp > 0) {
+            // key2 < LastKey
             key2r = key2;
         }
 
         auto r = CreateCharge(env, *pos->Part, tags, includeHistory)->Do(key1r, key2r, row1, row2, keyDefaults, items, bytes);
         ready &= r.Ready;
 
-        if (cmp >= 0 /* slice->LastKey >= key2 */) {
+        if (cmp >= 0) {
+            // key2 <= LastKey
             if (r.Overshot && ++pos != run.end()) {
-                // Unfortunately key > key2 might be at the start of the next slice
+                // Unfortunately first key > key2 might be at the start of the next slice
                 TRowId firstRow = pos->Slice.BeginRowId();
                 // Precharge the first row on the next slice
-                CreateCharge(env, *pos->Part, tags, includeHistory)->Do(firstRow, firstRow, keyDefaults, items, bytes);
+                ready &= CreateCharge(env, *pos->Part, tags, includeHistory)->Do(firstRow, firstRow, keyDefaults, items, bytes);
             }
 
             break;
         }
 
         // Will consume this slice before encountering key2
-        fromStart = true;
+        chargeFromSliceFirstRow = true;
         ++pos;
     }
 
@@ -63,20 +59,14 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
             const TRun &run, const TKeyCellDefaults &keyDefaults, TTagsRef tags,
             ui64 items, ui64 bytes, bool includeHistory) noexcept
 {
-    if (run.size() == 1) {
-        auto pos = run.begin();
-        TRowId row1 = pos->Slice.EndRowId() - 1;
-        TRowId row2 = pos->Slice.BeginRowId();
-        return CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(key1, key2, row1, row2, keyDefaults, items, bytes).Ready;
-    }
-
     bool ready = true;
     auto pos = run.LowerBoundReverse(key1);
 
     if (pos == run.end())
         return true;
 
-    bool fromEnd = TSlice::CompareLastKeySearchKey(pos->Slice, key1, keyDefaults) <= 0;
+    // LastKey <= key1
+    bool chargeFromSliceLastRow = TSlice::CompareLastKeySearchKey(pos->Slice, key1, keyDefaults) <= 0;
 
     for (;;) {
         TRowId row1 = pos->Slice.EndRowId() - 1;
@@ -86,11 +76,12 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
         const int cmp = key2 ? TSlice::CompareSearchKeyFirstKey(key2, pos->Slice, keyDefaults) : -1;
 
         TArrayRef<const TCell> key1r;
-        if (!fromEnd) {
+        if (!chargeFromSliceLastRow) {
             key1r = key1;
         }
         TArrayRef<const TCell> key2r;
-        if (cmp > 0 /* key2 > slice->FirstKey */) {
+        if (cmp > 0) {
+            // FirstKey < key2
             key2r = key2;
         }
 
@@ -102,19 +93,20 @@ bool ChargeRangeReverse(IPages *env, const TCells key1, const TCells key2,
         }
 
         if (cmp >= 0 /* key2 >= slice->FirstKey */) {
+            // FirstKey <= key2
             if (r.Overshot) {
                 --pos;
-                // Unfortunately key <= key2 might be at the end of the previous slice
+                // Unfortunately first key <= key2 might be at the end of the previous slice
                 TRowId lastRow = pos->Slice.EndRowId() - 1;
                 // Precharge the last row on the previous slice
-                CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(lastRow, lastRow, keyDefaults, items, bytes);
+                ready &= CreateCharge(env, *pos->Part, tags, includeHistory)->DoReverse(lastRow, lastRow, keyDefaults, items, bytes);
             }
 
             break;
         }
 
         // Will consume this slice before encountering key2
-        fromEnd = true;
+        chargeFromSliceLastRow = true;
         --pos;
     }
 

+ 18 - 12
ydb/core/tablet_flat/flat_part_dump.cpp

@@ -122,7 +122,7 @@ namespace {
         Out
             << " + Index{" << (ui16)label.Type << " rev "
             << label.Format << ", " << label.Size << "b}"
-            << " " << index->Count << " rec" << Endl
+            << " " << index->Count + (index.GetLastKeyRecord() ? 1 : 0) << " rec" << Endl
             << " |  Page     Row    Bytes  (";
 
         for (auto off : xrange(part.Scheme->Groups[0].KeyTypes.size())) {
@@ -133,18 +133,8 @@ namespace {
 
         Out << ")" << Endl;
 
-        for (auto iter = index->Begin(); iter; iter++) {
+        auto printIndexKey = [&](const NPage::TIndex::TRecord* record) {
             key.clear();
-
-            if (depth < 2 && iter.Off() >= 10) {
-                Out
-                    << " | -- skipped " << index->Count - iter.Off()
-                    << " entries, depth level " << depth << Endl;
-
-                break;
-            }
-
-            auto record = iter.GetRecord();
             for (const auto &info: part.Scheme->Groups[0].ColsKeyIdx)
                 key.push_back(record->Cell(info));
 
@@ -161,6 +151,22 @@ namespace {
             Key(key, *part.Scheme);
 
             Out << Endl;
+        };
+
+        for (auto iter = index->Begin(); iter; iter++) {
+            if (depth < 2 && iter.Off() >= 10) {
+                Out
+                    << " | -- skipped " << index->Count - iter.Off()
+                    << " entries, depth level " << depth << Endl;
+
+                break;
+            }
+
+            printIndexKey(iter.GetRecord());
+        }
+
+        if (index.GetLastKeyRecord()) {
+            printIndexKey(index.GetLastKeyRecord());
         }
     }
 

+ 15 - 1
ydb/core/tablet_flat/test/libs/table/test_part.h

@@ -176,7 +176,7 @@ namespace NTest {
             return index.GetLastRecord();
         }
 
-        inline const TPartIndexIt::TRecord * GetRecord(const TPartStore& part, TPageId pageIndex) {
+        inline const TPartIndexIt::TRecord * GetRecord(const TPartStore& part, ui32 pageIndex) {
             TTestEnv env;
             TPartIndexIt index(&part, &env, { });
 
@@ -187,6 +187,20 @@ namespace NTest {
 
             return index.GetRecord();
         }
+
+        inline TPageId GetFirstPageId(const TPartStore& part) {
+            TTestEnv env;
+            TPartIndexIt index(&part, &env, { });
+            index.Seek(0);
+            return index.GetPageId();
+        }
+
+        inline TPageId GetLastPageId(const TPartStore& part) {
+            TTestEnv env;
+            TPartIndexIt index(&part, &env, { });
+            index.Seek(index.GetEndRowId() - 1);
+            return index.GetPageId();
+        }
     }
 
 }}}

+ 102 - 30
ydb/core/tablet_flat/ut/ut_btree_index_iter_charge.cpp

@@ -2,6 +2,7 @@
 #include "flat_part_btree_index_iter.h"
 #include "flat_part_charge.h"
 #include "flat_part_charge_btree_index.h"
+#include "flat_part_charge_range.h"
 #include "flat_part_iter_multi.h"
 #include "test/libs/table/test_writer.h"
 #include <ydb/core/tablet_flat/test/libs/rows/layout.h>
@@ -38,7 +39,7 @@ namespace {
         TMap<TGroupId, TSet<TPageId>> Touched;
     };
 
-    void AssertLoadTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message) {
+    void AssertLoadedTheSame(const TPartStore& part, const TTouchEnv& bTree, const TTouchEnv& flat, const TString& message, bool allowFirstLastPageDifference = false) {
         TSet<TGroupId> groupIds;
         for (const auto &c : {bTree.Loaded, flat.Loaded}) {
             for (const auto &g : c) {
@@ -59,7 +60,17 @@ namespace {
                 }
             }
 
-            UNIT_ASSERT_VALUES_EQUAL_C(flatDataPages, bTreeDataPages, message);
+            // Note: it's possible that B-Tree index touches extra first / last page because it doesn't have boundary keys
+            // this should be resolved using slices (see ChargeRange)
+            if (allowFirstLastPageDifference) {
+                for (auto additionalPageId : {IndexTools::GetFirstPageId(part), IndexTools::GetLastPageId(part)}) {
+                    if (bTreeDataPages.contains(additionalPageId)) {
+                        flatDataPages.insert(additionalPageId);
+                    }
+                }
+            } else {
+                UNIT_ASSERT_VALUES_EQUAL_C(flatDataPages, bTreeDataPages, message);
+            }
         }
     }
 
@@ -111,7 +122,7 @@ namespace {
                 }
                 return TSerializedCellVec(key);
             };
-            auto add = [&](TRowId pageIndex1 /*inclusive*/, TRowId pageIndex2 /*exclusive*/) {
+            auto add = [&](ui32 pageIndex1 /*inclusive*/, ui32 pageIndex2 /*exclusive*/) {
                 TSlice slice;
                 slice.FirstInclusive = true;
                 slice.FirstRowId = pageIndex1 * 2;
@@ -366,12 +377,12 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
         Y_UNREACHABLE();
     }
 
-    bool DoChargeKeys(ICharge& charge, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
+    bool DoChargeKeys(const TPartStore& part, ICharge& charge, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
             bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) {
         while (true) {
             auto result = reverse
-                ? charge.DoReverse(key1, key2, 0, Max<TRowId>(), keyDefaults, itemsLimit, bytesLimit)
-                : charge.Do(key1, key2, 0, Max<TRowId>(), keyDefaults, itemsLimit, bytesLimit);
+                ? charge.DoReverse(key1, key2, part.Stat.Rows - 1, 0, keyDefaults, itemsLimit, bytesLimit)
+                : charge.Do(key1, key2, 0, part.Stat.Rows - 1, keyDefaults, itemsLimit, bytesLimit);
             if (result.Ready) {
                 return result.Overshot;
             }
@@ -382,8 +393,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
     }
 
     void CheckChargeRowId(const TPartStore& part, TTagsRef tags, const TKeyCellDefaults *keyDefaults, bool reverse) {
-        for (TRowId rowId1 : xrange(part.Stat.Rows + 1)) {
-            for (TRowId rowId2 : xrange(part.Stat.Rows + 1)) {
+        for (TRowId rowId1 : xrange(part.Stat.Rows)) {
+            for (TRowId rowId2 : xrange(part.Stat.Rows)) {
                 TTouchEnv bTreeEnv, flatEnv;
                 TChargeBTreeIndex bTree(&bTreeEnv, part, tags, true);
                 TCharge flat(&flatEnv, part, tags, true);
@@ -391,7 +402,7 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
                 TString message = TStringBuilder() << (reverse ? "ChargeRowIdReverse " : "ChargeRowId ") << rowId1 << " " << rowId2;
                 DoChargeRowId(bTree, bTreeEnv, rowId1, rowId2, 0, 0, reverse, *keyDefaults, message);
                 DoChargeRowId(flat, flatEnv, rowId1, rowId2, 0, 0, reverse, *keyDefaults, message);
-                AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+                AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
             }
         }
     }
@@ -418,15 +429,11 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
                         }
                         message << ")";
 
-                        bool bTreeOvershot = DoChargeKeys(bTree, bTreeEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
-                        bool flatOvershot = DoChargeKeys(flat, flatEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
+                        bool bTreeOvershot = DoChargeKeys(part, bTree, bTreeEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
+                        bool flatOvershot = DoChargeKeys(part, flat, flatEnv, key1, key2, 0, 0, reverse, *keyDefaults, message);
                         
-                        // TODO
-                        // UNIT_ASSERT_VALUES_EQUAL_C(bTreeOvershot, flatOvershot, message);
-                        Y_UNUSED(bTreeOvershot);
-                        Y_UNUSED(flatOvershot);
-
-                        AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+                        UNIT_ASSERT_VALUES_EQUAL_C(bTreeOvershot, flatOvershot, message);
+                        AssertLoadedTheSame(part, bTreeEnv, flatEnv, message, true);
                     }
                 }
             }
@@ -444,10 +451,8 @@ Y_UNIT_TEST_SUITE(TChargeBTreeIndex) {
 
         CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get(), false);
         CheckChargeRowId(part, tags, eggs.Scheme->Keys.Get(), true);
-        // TODO: isn't working yet
-        // CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), false);
-        // CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), true);
-        // TODO: mixed
+        CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), false);
+        CheckChargeKeys(part, tags, eggs.Scheme->Keys.Get(), true);
     }
 
     Y_UNIT_TEST(NoNodes) {
@@ -470,7 +475,7 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
         UNIT_ASSERT_VALUES_EQUAL_C(bTree.GetRowId(), flat.GetRowId(), message);
     }
 
-    EReady SeekKey(TRunIt& iter, TTouchEnv& env, ESeek seek, bool reverse, TCells key, const TString& message, ui32 failsAllowed = 10) {
+    EReady Seek(TRunIt& iter, TTouchEnv& env, ESeek seek, bool reverse, TCells key, const TString& message, ui32 failsAllowed = 10) {
         return Retry([&]() {
             return reverse ? iter.SeekReverse(key, seek) : iter.Seek(key, seek);
         }, env, message, failsAllowed);
@@ -482,7 +487,22 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
         }, env, message, failsAllowed);
     }
 
-    void CheckIterateKey(const TPartEggs& eggs) {
+    void Charge(const TRun &run, const TVector<TTag> tags, TTouchEnv& env, const TCells key1, const TCells key2, ui64 itemsLimit, ui64 bytesLimit,
+            bool reverse, const TKeyCellDefaults &keyDefaults, const TString& message, ui32 failsAllowed = 10) {
+        while (true) {
+            auto result = reverse
+                ? ChargeRangeReverse(&env, key1, key2, run, keyDefaults, tags, itemsLimit, bytesLimit, true)
+                : ChargeRange(&env, key1, key2, run, keyDefaults, tags, itemsLimit, bytesLimit, true);
+            if (result) {
+                return;
+            }
+            env.LoadTouched(true);
+            UNIT_ASSERT_C(failsAllowed--, "Too many fails " + message);
+        }
+        Y_UNREACHABLE();
+    }
+
+    void CheckIterate(const TPartEggs& eggs) {
         const auto part = *eggs.Lone();
 
         TRun btreeRun(*eggs.Scheme->Keys), flatRun(*eggs.Scheme->Keys);
@@ -512,18 +532,18 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
                         TRunIt bTree(btreeRun, tags, eggs.Scheme->Keys, &bTreeEnv);
 
                         {
-                            TStringBuilder message = TStringBuilder() << (reverse ?  "SeekKeyReverse" : "SeekKey") << "(" << seek << ") ";
+                            TStringBuilder message = TStringBuilder() << (reverse ?  "IterateReverse" : "Iterate") << "(" << seek << ") ";
                             for (auto c : key) {
                                 message << c.AsValue<ui32>() << " ";
                             }
-                            EReady bTreeReady = SeekKey(bTree, bTreeEnv, seek, reverse, key, message);
-                            EReady flatReady = SeekKey(flat, flatEnv, seek, reverse, key, message);
+                            EReady bTreeReady = Seek(bTree, bTreeEnv, seek, reverse, key, message);
+                            EReady flatReady = Seek(flat, flatEnv, seek, reverse, key, message);
                             AssertEqual(bTree, bTreeReady, flat, flatReady, message);
-                            AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+                            AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
                         }
 
                         for (ui32 steps = 1; steps <= 10; steps++) {
-                            TStringBuilder message = TStringBuilder() << (reverse ?  "SeekKeyReverse" : "SeekKey") << "(" << seek << ") ";
+                            TStringBuilder message = TStringBuilder() << (reverse ?  "IterateReverse" : "Iterate") << "(" << seek << ") ";
                             for (auto c : key) {
                                 message << c.AsValue<ui32>() << " ";
                             }
@@ -531,7 +551,58 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
                             EReady bTreeReady = Next(bTree, bTreeEnv, reverse, message);
                             EReady flatReady = Next(flat, flatEnv, reverse, message);
                             AssertEqual(bTree, bTreeReady, flat, flatReady, message);
-                            AssertLoadTheSame(part, bTreeEnv, flatEnv, message);
+                            AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    void CheckCharge(const TPartEggs& eggs) {
+        const auto part = *eggs.Lone();
+
+        TRun btreeRun(*eggs.Scheme->Keys), flatRun(*eggs.Scheme->Keys);
+        auto flatPart = part.CloneWithEpoch(part.Epoch);
+        for (auto& slice : *part.Slices) {
+            btreeRun.Insert(eggs.Lone(), slice);
+            auto pages = (TVector<TBtreeIndexMeta>*)&flatPart->IndexPages.BTreeGroups;
+            pages->clear();
+            pages = (TVector<TBtreeIndexMeta>*)&flatPart->IndexPages.BTreeHistoric;
+            pages->clear();
+            flatRun.Insert(flatPart, slice);
+        }
+
+        auto tags = TVector<TTag>();
+        for (auto c : eggs.Scheme->Cols) {
+            tags.push_back(c.Tag);
+        }
+
+        for (bool reverse : {false, true}) {
+            for (ui32 firstCellKey1 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
+                for (ui32 secondCellKey1 : xrange<ui32>(0, 14)) {
+                    for (ui32 firstCellKey2 : xrange<ui32>(0, part.Stat.Rows / 7 + 1)) {
+                        for (ui32 secondCellKey2 : xrange<ui32>(0, 14)) {
+                            TVector<TCell> key1 = MakeKey(firstCellKey1, secondCellKey1);
+                            TVector<TCell> key2 = MakeKey(firstCellKey2, secondCellKey2);
+
+                            TTouchEnv bTreeEnv, flatEnv;
+                            
+                            TStringBuilder message = TStringBuilder() << (reverse ? "ChargeReverse " : "Charge ") << "(";
+                            for (auto c : key1) {
+                                message << c.AsValue<ui32>() << " ";
+                            }
+                            message << ") (";
+                            for (auto c : key2) {
+                                message << c.AsValue<ui32>() << " ";
+                            }
+                            message << ")";
+
+                            // TODO: limits
+                            Charge(btreeRun, tags, bTreeEnv, key1, key2, 0, 0, reverse, *eggs.Scheme->Keys, message);
+                            Charge(flatRun, tags, flatEnv, key1, key2, 0, 0, reverse, *eggs.Scheme->Keys, message);
+
+                            AssertLoadedTheSame(part, bTreeEnv, flatEnv, message);
                         }
                     }
                 }
@@ -543,7 +614,8 @@ Y_UNIT_TEST_SUITE(TPartBtreeIndexIteration) {
         TPartEggs eggs = MakePart(slices, levels);
         const auto part = *eggs.Lone();
 
-        CheckIterateKey(eggs);
+        CheckIterate(eggs);
+        CheckCharge(eggs);
     }
 
     Y_UNIT_TEST(NoNodes_SingleSlice) {

+ 19 - 12
ydb/core/tablet_flat/ut/ut_charge.cpp

@@ -516,16 +516,18 @@ Y_UNIT_TEST_SUITE(Charge) {
                 {TGroupId{2}, {}}
             });
 
+            // key 0 transforms into row id 0 because it's before the slice first key 1
             me.To(101).CheckByKeys(0, 9, 0, TMap<TGroupId, TArr>{
                 {TGroupId{0}, {0, 1, 2, 3_I}},
-                {TGroupId{1}, {0_g, 1, 2, 3_g}},
-                {TGroupId{2}, {0_g, 1_g, 2_g, 3, 4, 5, 6_g}}
+                {TGroupId{1}, {0, 1, 2, 3_g}},
+                {TGroupId{2}, {0, 1, 2, 3, 4, 5, 6_g}}
             });
 
+            // key 1 also transforms into row id 0 because it's before the slice first key 1
             me.To(102).CheckByKeys(1, 9, 0, TMap<TGroupId, TArr>{
                 {TGroupId{0}, {0, 1, 2, 3_I}},
-                {TGroupId{1}, {0_g, 1, 2, 3_g}},
-                {TGroupId{2}, {0_g, 1_g, 2_g, 3, 4, 5, 6_g}}
+                {TGroupId{1}, {0, 1, 2, 3_g}},
+                {TGroupId{2}, {0, 1, 2, 3, 4, 5, 6_g}}
             });
 
             me.To(103).CheckByKeys(2, 9, 0, TMap<TGroupId, TArr>{
@@ -688,10 +690,11 @@ Y_UNIT_TEST_SUITE(Charge) {
                 {TGroupId{2}, {}}
             });
 
+            // key 35 transforms into row id 26 because it's after the slice last key 35
             me.To(301).CheckByKeys(27, 35, 0, TMap<TGroupId, TArr>{
                 {TGroupId{0}, {6, 7, 8}},
-                {TGroupId{1}, {10, 11, 12_g, 13_g}},
-                {TGroupId{2}, {20_g, 21, 22, 23, 24_g, 25_g, 26_g}}
+                {TGroupId{1}, {10, 11, 12, 13}},
+                {TGroupId{2}, {20_g, 21, 22, 23, 24, 25, 26}}
             });
 
             me.To(302).CheckByKeys(27, 34, 0, TMap<TGroupId, TArr>{
@@ -801,9 +804,11 @@ Y_UNIT_TEST_SUITE(Charge) {
 
         /*_ 1xx: custom spanned loads scenarios */
 
-        me.To(101).CheckByKeys(0, 35, 8 /* rows */, { 0, 1, 2, 3_I });
-        me.To(102).CheckByKeys(0, 35, 11 /* rows */, { 0, 1, 2, 3, 4_I });
-        me.To(103).CheckByKeys(0, 35, 14 /* rows */, { 0, 1, 2, 3, 4, 5_I });
+        // key 0 transforms into row id 0 because it's before the slice first key 1
+        me.To(101).CheckByKeys(0, 35, 8 /* rows */, { 0, 1, 2 });
+        me.To(102).CheckByKeys(0, 35, 11 /* rows */, { 0, 1, 2, 3 });
+        me.To(103).CheckByKeys(0, 35, 14 /* rows */, { 0, 1, 2, 3, 4 });
+
         me.To(104).CheckByKeys(3, 35, 5 /* rows */, { 0, 1, 2 });
         me.To(105).CheckByKeys(3, 35, 6 /* rows */, { 0, 1, 2, 3_I });
         me.To(106).CheckByKeys(4, 35, 6 /* rows */, { 0, 1, 2, 3 });
@@ -815,7 +820,7 @@ Y_UNIT_TEST_SUITE(Charge) {
 
         /*_ 2xx: one row charge limit on two page */
 
-        for (const ui16 page : xrange(4)) {
+        for (const ui16 page : xrange(1, 5)) {
             const TArr span1{ page, operator""_I(page + 1) };
             const TArr span2{ page, page + 1 };
 
@@ -860,9 +865,10 @@ Y_UNIT_TEST_SUITE(Charge) {
             {TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2_g}}
         });
 
+        // key 1 transforms into row id 0 because it's before the slice first key 1
         me.To(104).CheckByKeysReverse(15, 1, 0, TMap<TGroupId, TArr>{
             {TGroupId{0}, {3, 2, 1, 0}},
-            {TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2_g, 1_g, 0_g}}
+            {TGroupId{2}, {11_g, 10_g, 9_g, 8, 7, 6, 5, 4, 3, 2, 1, 0}}
         });
 
         me.To(105).CheckByKeysReverse(15, 0, 0, TMap<TGroupId, TArr>{
@@ -890,9 +896,10 @@ Y_UNIT_TEST_SUITE(Charge) {
             {TGroupId{2}, {26_g, 25_g, 24_g}}
         });
 
+        // key 35 transforms into row id 26 because it's after the slice last key 35
         me.To(110).CheckByKeysReverse(35, 32, 0, TMap<TGroupId, TArr>{
             {TGroupId{0}, {8, 7, 6_I}},
-            {TGroupId{2}, {26_g, 25_g, 24_g}}
+            {TGroupId{2}, {26, 25, 24}}
         });
 
         me.To(111).CheckByKeysReverse(4, 1, 0, TMap<TGroupId, TArr>{

+ 16 - 5
ydb/core/tablet_flat/ut/ut_part.cpp

@@ -952,7 +952,7 @@ Y_UNIT_TEST_SUITE(TPart) {
             return TSerializedCellVec(key);
         };
 
-        auto slices = MakeIntrusive<TSlices>();
+        TSlices slices;
         for (size_t rowId = 0; rowId < fullRows.size();) {
             TSlice slice;
             slice.FirstInclusive = true;
@@ -963,16 +963,25 @@ Y_UNIT_TEST_SUITE(TPart) {
             slice.LastRowId = rowId + RandomNumber<ui32>(2) + 1;
             if (slice.LastRowId < fullRows.size())
                 slice.LastKey = getKey(IndexTools::GetRecord(*cutPartTmp, slice.LastRowId));
-            slices->push_back(slice);
+            slices.push_back(slice);
             rowId = slice.LastRowId;
         }
 
         Cerr << "======= SLICES =======" << Endl;
-        slices->Describe(Cerr);
+        slices.Describe(Cerr);
         Cerr << Endl;
 
-        TCheckIt cutWrap(cutCook.Finish(), { new TTouchEnv() }, slices), fullWrap(fullCook.Finish(), { new TTouchEnv() });
-        TCheckReverseIt cutWrapR(cutCookR.Finish(), { new TTouchEnv() }, slices), fullWrapR(fullCookR.Finish(), { new TTouchEnv() });
+        auto cutEggs = cutCook.Finish(), cutEggsR = cutCookR.Finish();
+        for (auto& eggs : {cutEggs, cutEggsR}) {
+            auto partSlices = (TSlices*)eggs.Lone()->Slices.Get();
+            partSlices->clear();
+            for (auto s : slices) {
+                partSlices->push_back(s);
+            }
+        }
+
+        TCheckIt cutWrap(cutEggs, { new TTouchEnv() }), fullWrap(fullCook.Finish(), { new TTouchEnv() });
+        TCheckReverseIt cutWrapR(cutEggsR, { new TTouchEnv() }), fullWrapR(fullCookR.Finish(), { new TTouchEnv() });
 
         auto cutPart = (*cutWrap).Eggs.Lone();
         auto fullPart = (*fullWrap).Eggs.Lone();
@@ -984,6 +993,8 @@ Y_UNIT_TEST_SUITE(TPart) {
         Cerr << DumpPart(*fullPart, 2) << Endl;
 
         UNIT_ASSERT_GT(fullPart->IndexesRawSize, cutPart->IndexesRawSize);
+        UNIT_ASSERT_GT(cutPart->Slices->size(), 1);
+        UNIT_ASSERT_VALUES_EQUAL(fullPart->Slices->size(), 1);
         
         for (auto r : fullRows) {
             cutWrap.Has(*TSchemedCookRow(*lay).Col(r.first, r.second));