Browse Source

BTreeIndex Stats Visit top nodes only (#2177)

kungurtsev 1 year ago
parent
commit
f554ad1181

+ 1 - 0
.gitignore

@@ -74,3 +74,4 @@ util/all_thread.cpp
 util/all_util.cpp
 util/charset/all_charset.cpp
 
+list_result.log

+ 86 - 91
ydb/core/tablet_flat/benchmark/b_part.cpp

@@ -9,14 +9,19 @@
 #include <ydb/core/tablet_flat/test/libs/table/test_make.h>
 #include <ydb/core/tablet_flat/test/libs/table/test_mixer.h>
 #include "ydb/core/tablet_flat/flat_part_btree_index_iter.h"
+#include "ydb/core/tablet_flat/flat_stat_table.h"
 #include "ydb/core/tablet_flat/test/libs/table/wrap_iter.h"
+#include "ydb/core/tx/datashard/datashard.h"
 #include <ydb/core/tablet_flat/test/libs/table/test_writer.h>
 #include <ydb/core/tablet_flat/test/libs/table/test_envs.h>
 #include <ydb/core/tablet_flat/test/libs/table/wrap_part.h>
 #include <ydb/core/tablet_flat/test/libs/table/test_steps.h>
 
-namespace NKikimr {
-namespace NTable {
+#ifndef BENCHMARK_MAKE_LARGE_PART
+#define BENCHMARK_MAKE_LARGE_PART 0
+#endif
+
+namespace NKikimr::NTable {
 
 namespace {
     using namespace NTest;
@@ -26,14 +31,10 @@ namespace {
 
     NPage::TConf PageConf(size_t groups, bool writeBTreeIndex) noexcept
     {
-        NPage::TConf conf{ true, 1024 };
+        NPage::TConf conf;
 
         conf.Groups.resize(groups);
-        for (size_t group : xrange(groups)) {
-            conf.Group(group).PageSize = 1024;
-            conf.Group(group).BTreeIndexNodeTargetSize = 1024;
-        }
-
+        
         conf.WriteBTreeIndex = writeBTreeIndex;
 
         conf.SliceSize = conf.Group(0).PageSize * 4;
@@ -41,66 +42,28 @@ namespace {
         return conf;
     }
 
-    struct TPartEggsFixture : public benchmark::Fixture {
-        using TGroupId = NPage::TGroupId;
-
-        void SetUp(const ::benchmark::State& state) 
-        {
-            const bool groups = state.range(1);
-
-            TLayoutCook lay;
-
-            lay
-                .Col(0, 0,  NScheme::NTypeIds::Uint32)
-                .Col(0, 1,  NScheme::NTypeIds::Uint32)
-                .Col(0, 2,  NScheme::NTypeIds::Uint32)
-                .Col(0, 3,  NScheme::NTypeIds::Uint32)
-                .Col(groups ? 1 : 0, 4,  NScheme::NTypeIds::Uint32)
-                .Key({0, 1, 2});
-
-            TPartCook cook(lay, PageConf(groups ? 2 : 1, true));
-            
-            for (ui32 i = 0; (groups ? cook.GetDataBytes(0) + cook.GetDataBytes(1) : cook.GetDataBytes(0)) < 100ull*1024*1024; i++) {
-                cook.Add(*TSchemedCookRow(*lay).Col(i / 10000, i / 100 % 100, i % 100, i, i));
-            }
-
-            Eggs = cook.Finish();
-
-            const auto part = Eggs.Lone();
-
-            Cerr << "DataBytes = " << part->Stat.Bytes << " DataPages = " << IndexTools::CountMainPages(*part) << Endl;
-            Cerr << "FlatIndexBytes = " << part->GetPageSize(part->IndexPages.Groups[groups ? 1 : 0], {}) << " BTreeIndexBytes = " << part->IndexPages.BTreeGroups[groups ? 1 : 0].IndexSize << Endl;
-            Cerr << "Levels = " << part->IndexPages.BTreeGroups[groups ? 1 : 0].LevelCount << Endl;
-
-            // 100 MB
-            UNIT_ASSERT_GE(part->Stat.Bytes, 100ull*1024*1024);
-            UNIT_ASSERT_LE(part->Stat.Bytes, 100ull*1024*1024 + 10ull*1024*1024);
-
-            GroupId = TGroupId(groups ? 1 : 0);
-        }
-
-        TPartEggs Eggs;
-        TTestEnv Env;
-        TGroupId GroupId;
-    };
-
-    struct TPartSubsetFixture : public benchmark::Fixture {
+    struct TPartFixture : public benchmark::Fixture {
         using TGroupId = NPage::TGroupId;
 
-        void SetUp(const ::benchmark::State& state) 
+        void SetUp(::benchmark::State& state) 
         {
             const bool useBTree = state.range(0);
             const bool groups = state.range(1);
             const bool history = state.range(2);
 
-            Mass = new NTest::TMass(new NTest::TModelStd(groups), history ? 1000000 : 300000);
+            ui64 rows = history ? 300000 : 1000000;
+            if (BENCHMARK_MAKE_LARGE_PART) {
+                rows *= 10;
+            }
+            Mass = new NTest::TMass(new NTest::TModelStd(groups), rows);
             Subset = TMake(*Mass, PageConf(Mass->Model->Scheme->Families.size(), useBTree)).Mixed(0, 1, TMixerOne{ }, history ? 0.7 : 0);
             
-            for (const auto& part : Subset->Flatten) {
-                Cerr << "DataBytes = " << part->Stat.Bytes << " DataPages = " << IndexTools::CountMainPages(*part) << Endl;
-                Cerr << "FlatIndexBytes = " << part->GetPageSize(part->IndexPages.Groups[groups ? 1 : 0], {}) << " BTreeIndexBytes = " << (useBTree ? part->IndexPages.BTreeGroups[groups ? 1 : 0].IndexSize : 0) << Endl;
+            for (const auto& part : Subset->Flatten) { // single part
+                state.counters["DataBytes"] = part->Stat.Bytes;
+                state.counters["DataPages"] = IndexTools::CountMainPages(*part);
+                state.counters["IndexBytes"] = part->IndexesRawSize;
                 if (useBTree) {
-                    Cerr << "Levels = " << part->IndexPages.BTreeGroups[groups ? 1 : 0].LevelCount << Endl;
+                    state.counters["Levels{0}"] = part->IndexPages.BTreeGroups[0].LevelCount;
                 }
             }
 
@@ -111,6 +74,9 @@ namespace {
                 Checker = new TCheckIt(*Subset, {new TTestEnv()});
                 CheckerReverse = new TCheckReverseIt(*Subset, {new TTestEnv()});
             }
+
+            GroupId = TGroupId(groups, history);
+            Part = Subset->Flatten[0].Part.Get();
         }
 
         TMersenne<ui64> Rnd;
@@ -119,87 +85,99 @@ namespace {
         TAutoPtr<TCheckIt> Checker;
         TAutoPtr<TCheckReverseIt> CheckerReverse;
         TTestEnv Env;
+        TGroupId GroupId;
+        TPart const* Part;
     };
 }
 
-BENCHMARK_DEFINE_F(TPartEggsFixture, SeekRowId)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, SeekRowId)(benchmark::State& state) {
     const bool useBTree = state.range(0);
 
     for (auto _ : state) {
         THolder<IIndexIter> iter;
 
         if (useBTree) {
-            iter = MakeHolder<TPartBtreeIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+            iter = MakeHolder<TPartBtreeIndexIt>(Part, &Env, GroupId);
         } else {
-            iter = MakeHolder<TPartIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+            iter = MakeHolder<TPartIndexIt>(Part, &Env, GroupId);
         }
 
-        iter->Seek(RandomNumber<ui32>(Eggs.Lone()->Stat.Rows));    
+        iter->Seek(RandomNumber<ui32>(Part->Stat.Rows));    
     }
 }
 
-BENCHMARK_DEFINE_F(TPartEggsFixture, Next)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, Next)(benchmark::State& state) {
     const bool useBTree = state.range(0);
 
     THolder<IIndexIter> iter;
 
     if (useBTree) {
-        iter = MakeHolder<TPartBtreeIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+        iter = MakeHolder<TPartBtreeIndexIt>(Part, &Env, GroupId);
     } else {
-        iter = MakeHolder<TPartIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+        iter = MakeHolder<TPartIndexIt>(Part, &Env, GroupId);
     }
 
-    iter->Seek(RandomNumber<ui32>(Eggs.Lone()->Stat.Rows));
+    iter->Seek(RandomNumber<ui32>(Part->Stat.Rows));
 
     for (auto _ : state) {
         if (!iter->IsValid()) {
-            iter->Seek(RandomNumber<ui32>(Eggs.Lone()->Stat.Rows));
+            iter->Seek(RandomNumber<ui32>(Part->Stat.Rows));
         }
         iter->Next();
     }
 }
 
-BENCHMARK_DEFINE_F(TPartEggsFixture, Prev)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, Prev)(benchmark::State& state) {
     const bool useBTree = state.range(0);
 
     THolder<IIndexIter> iter;
 
     if (useBTree) {
-        iter = MakeHolder<TPartBtreeIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+        iter = MakeHolder<TPartBtreeIndexIt>(Part, &Env, GroupId);
     } else {
-        iter = MakeHolder<TPartIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+        iter = MakeHolder<TPartIndexIt>(Part, &Env, GroupId);
     }
 
-    iter->Seek(RandomNumber<ui32>(Eggs.Lone()->Stat.Rows));
+    iter->Seek(RandomNumber<ui32>(Part->Stat.Rows));
 
     for (auto _ : state) {
         if (!iter->IsValid()) {
-            iter->Seek(RandomNumber<ui32>(Eggs.Lone()->Stat.Rows));
+            iter->Seek(RandomNumber<ui32>(Part->Stat.Rows));
         }
         iter->Prev();
     }
 }
 
-BENCHMARK_DEFINE_F(TPartEggsFixture, SeekKey)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, SeekKey)(benchmark::State& state) {
     const bool useBTree = state.range(0);
-    const ESeek seek = ESeek(state.range(2));
+    const ESeek seek = ESeek(state.range(3));
+
+    TRowTool rowTool(*Subset->Scheme);
+    auto tags = TVector<TTag>();
+    for (auto c : Subset->Scheme->Cols) {
+        tags.push_back(c.Tag);
+    }
 
     for (auto _ : state) {
         THolder<IIndexIter> iter;
 
         if (useBTree) {
-            iter = MakeHolder<TPartBtreeIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+            iter = MakeHolder<TPartBtreeIndexIt>(Part, &Env, GroupId);
         } else {
-            iter = MakeHolder<TPartIndexIt>(Eggs.Lone().Get(), &Env, GroupId);
+            iter = MakeHolder<TPartIndexIt>(Part, &Env, GroupId);
         }
 
-        ui32 rowId = RandomNumber<ui32>(Eggs.Lone()->Stat.Rows);
-        TVector<TCell> key{TCell::Make(rowId / 10000), TCell::Make(rowId / 100 % 100), TCell::Make(rowId % 100)};
-        iter->Seek(seek, key, Eggs.Scheme->Keys.Get());
+        state.PauseTiming();
+        auto& row = *Mass->Saved.Any(Rnd);
+        auto key_ = rowTool.LookupKey(row);
+        const TCelled key(key_, *Subset->Scheme->Keys, false);
+        state.ResumeTiming();
+
+        iter->Seek(seek, key, Subset->Scheme->Keys.Get());
     }
 }
 
-BENCHMARK_DEFINE_F(TPartSubsetFixture, DoReads)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, DoReads)(benchmark::State& state) {
     const bool reverse = state.range(3);
     const ESeek seek = static_cast<ESeek>(state.range(4));
     const ui32 items = state.range(5);
@@ -221,7 +199,7 @@ BENCHMARK_DEFINE_F(TPartSubsetFixture, DoReads)(benchmark::State& state) {
     }
 }
 
-BENCHMARK_DEFINE_F(TPartSubsetFixture, DoCharge)(benchmark::State& state) {
+BENCHMARK_DEFINE_F(TPartFixture, DoCharge)(benchmark::State& state) {
     const bool reverse = state.range(3);
     const ui32 items = state.range(4);
 
@@ -245,32 +223,43 @@ BENCHMARK_DEFINE_F(TPartSubsetFixture, DoCharge)(benchmark::State& state) {
     }
 }
 
-BENCHMARK_REGISTER_F(TPartEggsFixture, SeekRowId)
+BENCHMARK_DEFINE_F(TPartFixture, BuildStats)(benchmark::State& state) {
+    for (auto _ : state) {
+        TStats stats;
+        BuildStats(*Subset, stats, NDataShard::gDbStatsRowCountResolution, NDataShard::gDbStatsDataSizeResolution, &Env);
+    }
+}
+
+BENCHMARK_REGISTER_F(TPartFixture, SeekRowId)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
-        /* groups: */ {0, 1}})
+        /* groups: */ {0, 1},
+        /* history: */ {0}})
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK_REGISTER_F(TPartEggsFixture, Next)
+BENCHMARK_REGISTER_F(TPartFixture, Next)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
-        /* groups: */ {0, 1}})
+        /* groups: */ {0, 1},
+        /* history: */ {0}})
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK_REGISTER_F(TPartEggsFixture, Prev)
+BENCHMARK_REGISTER_F(TPartFixture, Prev)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
-        /* groups: */ {0, 1}})
+        /* groups: */ {0, 1},
+        /* history: */ {0}})
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK_REGISTER_F(TPartEggsFixture, SeekKey)
+BENCHMARK_REGISTER_F(TPartFixture, SeekKey)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
         /* groups: */ {0, 1},
+        /* history: */ {0},
         /* ESeek: */ {1}})
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK_REGISTER_F(TPartSubsetFixture, DoReads)
+BENCHMARK_REGISTER_F(TPartFixture, DoReads)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
         /* groups: */ {1},
@@ -280,7 +269,7 @@ BENCHMARK_REGISTER_F(TPartSubsetFixture, DoReads)
         /* items */ {1, 50, 1000}})
     ->Unit(benchmark::kMicrosecond);
 
-BENCHMARK_REGISTER_F(TPartSubsetFixture, DoCharge)
+BENCHMARK_REGISTER_F(TPartFixture, DoCharge)
     ->ArgsProduct({
         /* b-tree */ {0, 1},
         /* groups: */ {1},
@@ -289,5 +278,11 @@ BENCHMARK_REGISTER_F(TPartSubsetFixture, DoCharge)
         /* items */ {1, 50, 1000}})
     ->Unit(benchmark::kMicrosecond);
 
-}
+BENCHMARK_REGISTER_F(TPartFixture, BuildStats)
+    ->ArgsProduct({
+        /* b-tree */ {0, 1},
+        /* groups: */ {0, 1},
+        /* history: */ {0, 1}})
+    ->Unit(benchmark::kMicrosecond);
+
 }

+ 8 - 1
ydb/core/tablet_flat/benchmark/ya.make

@@ -2,7 +2,14 @@ G_BENCHMARK()
 
 TAG(ya:fat)
 SIZE(LARGE)
-TIMEOUT(1200)
+TIMEOUT(600)
+
+IF (BENCHMARK_MAKE_LARGE_PART)
+    CFLAGS(
+        -DBENCHMARK_MAKE_LARGE_PART=1
+    )
+    TIMEOUT(1200)
+ENDIF()
 
 SRCS(
     b_part.cpp

+ 30 - 5
ydb/core/tablet_flat/flat_stat_part.h

@@ -21,7 +21,8 @@ class TStatsScreenedPartIterator {
 
 public:
     TStatsScreenedPartIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults, 
-            TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large)
+            TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large,
+            ui64 rowCountResolution, ui64 dataSizeResolution)
         : Part(std::move(partView.Part))
         , KeyDefaults(std::move(keyDefaults))
         , Groups(::Reserve(Part->GroupsCount))
@@ -31,11 +32,35 @@ public:
         , Large(std::move(large))
         , CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1))
     {
-        for (ui32 groupIndex : xrange(Part->GroupsCount)) {
-            Groups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex)));
+        TVector<TRowId> splitPoints;
+        if (Screen) {
+            splitPoints.reserve(Screen->Size() * 2);
+            for (auto hole : *Screen) {
+                for (auto splitPoint : {hole.Begin, hole.End}) {
+                    Y_DEBUG_ABORT_UNLESS(splitPoints.empty() || splitPoints.back() <= splitPoint);
+                    if (0 < splitPoint && splitPoint < Part->Stat.Rows - 1 && (splitPoints.empty() || splitPoints.back() < splitPoint)) {
+                        splitPoints.push_back(splitPoint);
+                    }
+                }
+            }
         }
-        for (ui32 groupIndex : xrange(Part->HistoricGroupsCount)) {
-            HistoricGroups.push_back(CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, true)));
+
+        for (bool historic : {false, true}) {
+            for (ui32 groupIndex : xrange(historic ? Part->HistoricGroupsCount : Part->GroupsCount)) {
+                ui64 groupRowCountResolution, groupDataSizeResolution;
+                if (groupIndex == 0 && Part->GroupsCount > 1) {
+                    // make steps as small as possible because they will affect groups resolution
+                    groupRowCountResolution = groupDataSizeResolution = 0;
+                } else {
+                    groupRowCountResolution = rowCountResolution;
+                    groupDataSizeResolution = dataSizeResolution;
+                }
+
+                (historic ? HistoricGroups : Groups).push_back(
+                    CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, historic), 
+                        groupRowCountResolution, groupDataSizeResolution, 
+                        historic || groupRowCountResolution == 0 ? TVector<TRowId>() : splitPoints));
+            }
         }
     }
 

+ 41 - 14
ydb/core/tablet_flat/flat_stat_part_group_btree_index.h

@@ -22,20 +22,23 @@ class TStatsPartGroupBtreeIndexIterator : public IStatsPartGroupIterator {
         TRowId BeginRowId;
         TRowId EndRowId;
         TCellsIterable BeginKey;
-        ui64 DataSize;
+        ui64 BeginDataSize;
+        ui64 EndDataSize;
 
-        TNodeState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, ui64 dataSize)
+        TNodeState(TPageId pageId, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, ui64 beginDataSize, ui64 endDataSize)
             : PageId(pageId)
             , BeginRowId(beginRowId)
             , EndRowId(endRowId)
             , BeginKey(beginKey)
-            , DataSize(dataSize)
+            , BeginDataSize(beginDataSize)
+            , EndDataSize(endDataSize)
         {
         }
     };
 
 public:
-    TStatsPartGroupBtreeIndexIterator(const TPart* part, IPages* env, TGroupId groupId)
+    TStatsPartGroupBtreeIndexIterator(const TPart* part, IPages* env, TGroupId groupId,
+            ui64 rowCountResolution, ui64 dataSizeResolution, const TVector<TRowId>& splitPoints)
         : Part(part)
         , Env(env)
         , GroupId(groupId)
@@ -43,7 +46,11 @@ public:
         , Meta(groupId.IsHistoric() ? part->IndexPages.BTreeHistoric[groupId.Index] : part->IndexPages.BTreeGroups[groupId.Index])
         , GroupChannel(Part->GetGroupChannel(GroupId))
         , NodeIndex(0)
+        , RowCountResolution(rowCountResolution)
+        , DataSizeResolution(dataSizeResolution)
+        , SplitPoints(splitPoints) // make copy for Start
     {
+        Y_DEBUG_ABORT_UNLESS(std::is_sorted(SplitPoints.begin(), SplitPoints.end()));
     }
     
     EReady Start() override {
@@ -51,14 +58,28 @@ public:
 
         bool ready = true;
         TVector<TNodeState> nextNodes;
-        Nodes.emplace_back(Meta.PageId, 0, GetEndRowId(), EmptyKey, Meta.DataSize);
+        Nodes.emplace_back(Meta.PageId, 0, GetEndRowId(), EmptyKey, 0, Meta.DataSize);
 
         for (ui32 height = 0; height < Meta.LevelCount; height++) {
+            bool hasChanges = false;
+            size_t splitPointIndex = 0;
+
             for (auto &nodeState : Nodes) {
+                while (splitPointIndex < SplitPoints.size() && SplitPoints[splitPointIndex] < nodeState.BeginRowId) {
+                    splitPointIndex++;
+                }
+                if (splitPointIndex < SplitPoints.size() && SplitPoints[splitPointIndex] < nodeState.EndRowId) {
+                    // split node and go deeper
+                } else if (nodeState.EndRowId - nodeState.BeginRowId <= RowCountResolution
+                        && nodeState.EndDataSize - nodeState.BeginDataSize <= DataSizeResolution) {
+                    nextNodes.push_back(nodeState); // lift current node on the next level as-is
+                    continue; // don't go deeper
+                }
+
                 auto page = Env->TryGetPage(Part, nodeState.PageId);
                 if (!page) {
                     ready = false;
-                    continue;
+                    continue; // continue requesting other nodes
                 }
                 TBtreeIndexNode node(*page);
 
@@ -68,14 +89,20 @@ public:
                     TRowId beginRowId = pos ? node.GetShortChild(pos - 1).RowCount : nodeState.BeginRowId;
                     TRowId endRowId = child.RowCount;
                     TCellsIterable beginKey = pos ? node.GetKeyCellsIterable(pos - 1, GroupInfo.ColsKeyIdx) : nodeState.BeginKey;
-                    ui64 dataSize = child.DataSize;
+                    ui64 beginDataSize = pos ? node.GetShortChild(pos - 1).DataSize : nodeState.BeginDataSize;
+                    ui64 endDataSize = child.DataSize;
 
-                    nextNodes.emplace_back(child.PageId, beginRowId, endRowId, beginKey, dataSize);
+                    nextNodes.emplace_back(child.PageId, beginRowId, endRowId, beginKey, beginDataSize, endDataSize);
+                    hasChanges = true;
                 }
             }
 
             Nodes.swap(nextNodes);
             nextNodes.clear();
+
+            if (!hasChanges) {
+                break; // don't go deeper
+            }
         }
 
         if (!ready) {
@@ -90,7 +117,7 @@ public:
         Y_ABORT_UNLESS(IsValid());
 
         NodeIndex++;
-        
+
         Y_DEBUG_ABORT_UNLESS(NodeIndex == Nodes.size() || Nodes[NodeIndex - 1].EndRowId == Nodes[NodeIndex].BeginRowId);
 
         return DataOrGone();
@@ -98,11 +125,8 @@ public:
 
     void AddLastDeltaDataSize(TChanneledDataSize& dataSize) override {
         Y_DEBUG_ABORT_UNLESS(NodeIndex);
-        ui64 delta = Nodes[NodeIndex - 1].DataSize;
-        if (NodeIndex > 1) {
-            Y_DEBUG_ABORT_UNLESS(delta >= Nodes[NodeIndex - 2].DataSize);
-            delta -= Nodes[NodeIndex - 2].DataSize;
-        }
+        Y_DEBUG_ABORT_UNLESS(Nodes[NodeIndex - 1].EndDataSize >= Nodes[NodeIndex - 1].BeginDataSize);
+        ui64 delta = Nodes[NodeIndex - 1].EndDataSize - Nodes[NodeIndex - 1].BeginDataSize;
         ui8 channel = Part->GetGroupChannel(GroupId);
         dataSize.Add(delta, channel);
     }
@@ -151,6 +175,9 @@ private:
     ui8 GroupChannel;
     ui32 NodeIndex;
     TVector<TNodeState> Nodes;
+    ui64 RowCountResolution;
+    ui64 DataSizeResolution;
+    TVector<TRowId> SplitPoints;
 };
 
 }

+ 3 - 2
ydb/core/tablet_flat/flat_stat_part_group_iter_create.cpp

@@ -4,10 +4,11 @@
 
 namespace NKikimr::NTable {
 
-THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId)
+THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId, 
+    ui64 rowCountResolution, ui64 dataSizeResolution, const TVector<TRowId>& splitPoints)
 {
     if (groupId.Index < (groupId.IsHistoric() ? part->IndexPages.BTreeHistoric : part->IndexPages.BTreeGroups).size()) {
-        return MakeHolder<TStatsPartGroupBtreeIndexIterator>(part, env, groupId);
+        return MakeHolder<TStatsPartGroupBtreeIndexIterator>(part, env, groupId, rowCountResolution, dataSizeResolution, splitPoints);
     } else {
         return MakeHolder<TPartIndexIt>(part, env, groupId);
     }

+ 2 - 1
ydb/core/tablet_flat/flat_stat_part_group_iter_iface.h

@@ -40,6 +40,7 @@ struct IStatsPartGroupIterator {
     virtual ~IStatsPartGroupIterator() = default;
 };
 
-THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId);
+THolder<IStatsPartGroupIterator> CreateStatsPartGroupIterator(const TPart* part, IPages* env, NPage::TGroupId groupId, 
+    ui64 rowCountResolution, ui64 dataSizeResolution, const TVector<TRowId>& splitPoints);
     
 }

+ 10 - 1
ydb/core/tablet_flat/flat_stat_table.cpp

@@ -12,11 +12,20 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
     TDataStats iteratorStats = { };
     TStatsIterator statsIterator(subset.Scheme->Keys);
 
+    THashSet<ui64> epochs;
+    for (const auto& part : subset.Flatten) {
+        epochs.insert(part->Epoch.ToCounter());
+    }
+    // if rowCountResolution = 300, 3-leveled SST, let's move each iterator up to 25 rows 
+    ui64 iterRowCountResolution = rowCountResolution / Max(1lu, epochs.size()) / 4;
+    ui64 iterDataSizeResolution = dataSizeResolution / Max(1lu, epochs.size()) / 4;
+
     // Make index iterators for all parts
     bool started = true;
     for (const auto& part : subset.Flatten) {
         stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel());
-        TAutoPtr<TStatsScreenedPartIterator> iter = new TStatsScreenedPartIterator(part, env, subset.Scheme->Keys, part->Small, part->Large);
+        TAutoPtr<TStatsScreenedPartIterator> iter = new TStatsScreenedPartIterator(part, env, subset.Scheme->Keys, part->Small, part->Large, 
+            iterRowCountResolution, iterDataSizeResolution);
         auto ready = iter->Start();
         if (ready == EReady::Page) {
             started = false;

+ 4 - 4
ydb/core/tablet_flat/flat_table_part_ut.cpp

@@ -78,7 +78,7 @@ Y_UNIT_TEST_SUITE(TLegacy) {
             TDataStats stats = { };
             TTestEnv env;
             // TScreenedPartIndexIterator without screen previously was TPartIndexIterator
-            TStatsScreenedPartIterator idxIter(TPartView{part, nullptr, nullptr}, &env, scheme->Keys, nullptr, nullptr);
+            TStatsScreenedPartIterator idxIter(TPartView{part, nullptr, nullptr}, &env, scheme->Keys, nullptr, nullptr, 0, 0);
             sizes.clear();
 
             UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data);
@@ -147,7 +147,7 @@ Y_UNIT_TEST_SUITE(TLegacy) {
                             TIntrusiveConstPtr<TRowScheme> scheme, TIntrusiveConstPtr<NPage::TFrames> frames) -> std::pair<ui64, ui64> {
             TDataStats stats = { };
             TTestEnv env;
-            TStatsScreenedPartIterator idxIter(TPartView{part, screen, nullptr}, &env, scheme->Keys, std::move(frames), nullptr);
+            TStatsScreenedPartIterator idxIter(TPartView{part, screen, nullptr}, &env, scheme->Keys, std::move(frames), nullptr, 0, 0);
 
             UNIT_ASSERT_VALUES_EQUAL(idxIter.Start(), EReady::Data);
             while (idxIter.IsValid()) {
@@ -308,8 +308,8 @@ Y_UNIT_TEST_SUITE(TLegacy) {
         TTestEnv env;
         TStatsIterator stIter(lay2.RowScheme()->Keys);
         {
-            auto it1 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs2.At(0), screen2, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr);
-            auto it2 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs1.At(0), screen1, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr);
+            auto it1 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs2.At(0), screen2, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr, 0, 0);
+            auto it2 = MakeHolder<TStatsScreenedPartIterator>(TPartView{eggs1.At(0), screen1, nullptr}, &env, lay2.RowScheme()->Keys, nullptr, nullptr, 0, 0);
             UNIT_ASSERT_VALUES_EQUAL(it1->Start(), EReady::Data);
             UNIT_ASSERT_VALUES_EQUAL(it2->Start(), EReady::Data);
             stIter.Add(std::move(it1));

+ 61 - 11
ydb/core/tablet_flat/ut/ut_stat.cpp

@@ -23,7 +23,7 @@ namespace {
         TMap<TGroupId, TSet<TPageId>> Touched;
     };
 
-    NPage::TConf PageConf(size_t groups, bool writeBTreeIndex) noexcept
+    NPage::TConf PageConf(size_t groups, bool writeBTreeIndex, bool lowResolution = false) noexcept
     {
         NPage::TConf conf{ true, 2 * 1024 };
 
@@ -31,6 +31,10 @@ namespace {
         for (size_t group : xrange(groups)) {
             conf.Group(group).IndexMin = 1024; /* Should cover index buffer grow code */
             conf.Group(group).BTreeIndexNodeTargetSize = 512; /* Should cover up/down moves */
+            if (lowResolution) {
+                // make more levels
+                conf.Group(group).BTreeIndexNodeKeysMin = conf.Group(group).BTreeIndexNodeKeysMax = 2;
+            }
         }
         conf.SmallEdge = 19;  /* Packed to page collection large cell values */
         conf.LargeEdge = 29;  /* Large values placed to single blobs */
@@ -57,14 +61,25 @@ namespace {
         }
     }
 
-    template<typename TEnv>
-    void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex) {
+    void Check(const TSubset& subset, THistogram histogram, ui64 resolution) {
+        ui64 additionalErrorRate = 1;
+        if (subset.Flatten.size() > 1 && subset.Flatten[0]->GroupsCount > 1) {
+            additionalErrorRate = 2;
+        }
+        for (ui32 i = 1; i < histogram.size(); i++) {
+            auto delta = histogram[i].Value - histogram[i - 1].Value;
+            UNIT_ASSERT_GE_C(delta, resolution, "Delta = " << delta << " Resolution = " << resolution);
+            UNIT_ASSERT_LE_C(delta, resolution * additionalErrorRate * 3 / 2, "Delta = " << delta << " Resolution = " << resolution);
+        }
+    }
+
+    void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex, ui64 rowCountResolution = 531, ui64 dataSizeResolution = 53105) {
         TStats stats;
-        TEnv env;
+        TTouchEnv env;
 
         const ui32 attempts = 10;
         for (ui32 attempt : xrange(attempts)) {
-            if (NTable::BuildStats(subset, stats, 531, 53105, &env)) {
+            if (NTable::BuildStats(subset, stats, rowCountResolution, dataSizeResolution, &env)) {
                 break;
             }
             UNIT_ASSERT_C(attempt + 1 < attempts, "Too many attempts");
@@ -77,14 +92,10 @@ namespace {
 
         Cerr << "RowCountHistogram:" << Endl;
         Dump(subset, stats.RowCountHistogram);
+        Check(subset, stats.RowCountHistogram, rowCountResolution);
         Cerr << "DataSizeHistogram:" << Endl;
         Dump(subset, stats.DataSizeHistogram);
-    }
-    
-    
-    void Check(const TSubset& subset, ui64 expectedRows, ui64 expectedData, ui64 expectedIndex) {
-        Check<TTestEnv>(subset, expectedRows, expectedData, expectedIndex);
-        Check<TTouchEnv>(subset, expectedRows, expectedData, expectedIndex);
+        Check(subset, stats.DataSizeHistogram, dataSizeResolution);
     }
 }
 
@@ -246,6 +257,45 @@ Y_UNIT_TEST_SUITE(BuildStats) {
         auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), false)).Mixed(0, 4, mixer, 0.3);
         Check(*subset, 24000, 4054290, 19168);
     }
+
+    Y_UNIT_TEST(Single_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ });   
+        Check(*subset, 24000, 2106439, 56610, 5310, 531050);
+    }
+
+    Y_UNIT_TEST(Single_Slices_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0, 13);   
+        subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
+        Check(*subset, 12816, 1121048, 56610, 5310, 531050);
+    }
+
+    Y_UNIT_TEST(Single_Groups_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ });   
+        Check(*subset, 24000, 2460139, 29557, 5310, 531050);
+    }
+
+    Y_UNIT_TEST(Single_Groups_Slices_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0, 13);   
+        subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
+        Check(*subset, 10440, 1060798, 29557, 5310, 531050);
+    }
+
+    Y_UNIT_TEST(Single_Groups_History_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0.3);   
+        Check(*subset, 24000, 4054050, 42292, 5310, 531050);
+    }
+
+    Y_UNIT_TEST(Single_Groups_History_Slices_LowResolution_BTreeIndex)
+    {
+        auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, true)).Mixed(0, 1, TMixerOne{ }, 0.3, 13);   
+        subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl;
+        Check(*subset, 13570, 2186460 /* ~2277890 */, 42292, 5310, 531050);
+    }
 }
 
 }