flat_stat_part.h 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. #pragma once
  2. #include "flat_part_iface.h"
  3. #include "flat_part_laid.h"
  4. #include "flat_page_frames.h"
  5. #include "flat_stat_part_group_iter_iface.h"
  6. #include <library/cpp/containers/stack_vector/stack_vec.h>
  7. #include <util/draft/holder_vector.h>
  8. namespace NKikimr {
  9. namespace NTable {
  10. // Iterates over part index and calculates total row count and data size
  11. // This iterator skips pages that are screened. Currently the logic is simple:
  12. // if page start key is screened then we assume that the whole previous page is screened
  13. // if page start key is not screened then the whole previous page is added to stats
  14. class TStatsScreenedPartIterator {
  15. using TGroupId = NPage::TGroupId;
  16. using TFrames = NPage::TFrames;
  17. public:
  18. TStatsScreenedPartIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyDefaults,
  19. TIntrusiveConstPtr<TFrames> small, TIntrusiveConstPtr<TFrames> large,
  20. ui64 rowCountResolution, ui64 dataSizeResolution)
  21. : Part(std::move(partView.Part))
  22. , KeyDefaults(std::move(keyDefaults))
  23. , Groups(::Reserve(Part->GroupsCount))
  24. , HistoricGroups(::Reserve(Part->HistoricGroupsCount))
  25. , Screen(std::move(partView.Screen))
  26. , Small(std::move(small))
  27. , Large(std::move(large))
  28. , CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1))
  29. {
  30. TVector<TRowId> splitPoints;
  31. if (Screen) {
  32. splitPoints.reserve(Screen->Size() * 2);
  33. for (auto hole : *Screen) {
  34. for (auto splitPoint : {hole.Begin, hole.End}) {
  35. Y_DEBUG_ABORT_UNLESS(splitPoints.empty() || splitPoints.back() <= splitPoint);
  36. if (0 < splitPoint && splitPoint < Part->Stat.Rows - 1 && (splitPoints.empty() || splitPoints.back() < splitPoint)) {
  37. splitPoints.push_back(splitPoint);
  38. }
  39. }
  40. }
  41. }
  42. for (bool historic : {false, true}) {
  43. for (ui32 groupIndex : xrange(historic ? Part->HistoricGroupsCount : Part->GroupsCount)) {
  44. ui64 groupRowCountResolution, groupDataSizeResolution;
  45. if (groupIndex == 0 && Part->GroupsCount > 1) {
  46. // make steps as small as possible because they will affect groups resolution
  47. groupRowCountResolution = groupDataSizeResolution = 0;
  48. } else {
  49. groupRowCountResolution = rowCountResolution;
  50. groupDataSizeResolution = dataSizeResolution;
  51. }
  52. (historic ? HistoricGroups : Groups).push_back(
  53. CreateStatsPartGroupIterator(Part.Get(), env, TGroupId(groupIndex, historic),
  54. groupRowCountResolution, groupDataSizeResolution,
  55. historic || groupRowCountResolution == 0 ? TVector<TRowId>() : splitPoints));
  56. }
  57. }
  58. }
  59. EReady Start() {
  60. auto ready = EReady::Data;
  61. for (auto& iter : Groups) {
  62. if (iter->Start() == EReady::Page) {
  63. ready = EReady::Page;
  64. }
  65. }
  66. for (auto& iter : HistoricGroups) {
  67. if (iter->Start() == EReady::Page) {
  68. ready = EReady::Page;
  69. }
  70. }
  71. if (ready != EReady::Page) {
  72. FillKey();
  73. }
  74. return ready;
  75. }
  76. bool IsValid() const {
  77. return Groups[0]->IsValid();
  78. }
  79. EReady Next(TDataStats& stats) {
  80. Y_ABORT_UNLESS(IsValid());
  81. LastRowId = Groups[0]->GetRowId();
  82. auto ready = Groups[0]->Next();
  83. if (ready == EReady::Page) {
  84. Y_DEBUG_ABORT_UNLESS(false, "Shouldn't really happen");
  85. return ready;
  86. }
  87. ui64 rowCount = CountUnscreenedRows(GetLastRowId(), GetCurrentRowId());
  88. stats.RowCount += rowCount;
  89. if (rowCount) {
  90. Groups[0]->AddLastDeltaDataSize(stats.DataSize);
  91. }
  92. TRowId nextRowId = ready == EReady::Data ? Groups[0]->GetRowId() : Max<TRowId>();
  93. for (auto groupIndex : xrange<ui32>(1, Groups.size())) {
  94. while (Groups[groupIndex]->IsValid() && Groups[groupIndex]->GetRowId() < nextRowId) {
  95. // eagerly include all data up to the next row id
  96. if (Groups[groupIndex]->Next() == EReady::Page) {
  97. Y_DEBUG_ABORT_UNLESS(false, "Shouldn't really happen");
  98. ready = EReady::Page;
  99. break;
  100. }
  101. if (rowCount) {
  102. Groups[groupIndex]->AddLastDeltaDataSize(stats.DataSize);
  103. }
  104. }
  105. }
  106. if (HistoricGroups) {
  107. Y_DEBUG_ABORT_UNLESS(Part->Scheme->HistoryGroup.ColsKeyIdx.size() == 3);
  108. while (HistoricGroups[0]->IsValid() && (!HistoricGroups[0]->GetKeyCellsCount() || HistoricGroups[0]->GetKeyCell(0).AsValue<TRowId>() < nextRowId)) {
  109. // eagerly include all history up to the next row id
  110. if (HistoricGroups[0]->Next() == EReady::Page) {
  111. Y_DEBUG_ABORT_UNLESS(false, "Shouldn't really happen");
  112. ready = EReady::Page;
  113. break;
  114. }
  115. if (rowCount) {
  116. HistoricGroups[0]->AddLastDeltaDataSize(stats.DataSize);
  117. }
  118. }
  119. TRowId nextHistoryRowId = HistoricGroups[0]->IsValid() ? HistoricGroups[0]->GetRowId() : Max<TRowId>();
  120. for (auto groupIndex : xrange<ui32>(1, Groups.size())) {
  121. while (HistoricGroups[groupIndex]->IsValid() && HistoricGroups[groupIndex]->GetRowId() < nextHistoryRowId) {
  122. // eagerly include all data up to the next row id
  123. if (HistoricGroups[groupIndex]->Next() == EReady::Page) {
  124. Y_DEBUG_ABORT_UNLESS(false, "Shouldn't really happen");
  125. ready = EReady::Page;
  126. break;
  127. }
  128. if (rowCount) {
  129. HistoricGroups[groupIndex]->AddLastDeltaDataSize(stats.DataSize);
  130. }
  131. }
  132. }
  133. }
  134. if (rowCount) {
  135. if (Small) {
  136. AddBlobsSize(stats.DataSize, Small.Get(), ELargeObj::Outer, PrevSmallPage);
  137. }
  138. if (Large) {
  139. AddBlobsSize(stats.DataSize, Large.Get(), ELargeObj::Extern, PrevLargePage);
  140. }
  141. }
  142. FillKey();
  143. return ready;
  144. }
  145. TDbTupleRef GetCurrentKey() const {
  146. Y_ABORT_UNLESS(KeyDefaults->BasicTypes().size() == CurrentKey.size());
  147. return TDbTupleRef(KeyDefaults->BasicTypes().data(), CurrentKey.data(), CurrentKey.size());
  148. }
  149. private:
  150. ui64 GetLastRowId() const {
  151. return LastRowId;
  152. }
  153. ui64 GetCurrentRowId() const {
  154. if (IsValid()) {
  155. return Groups[0]->GetRowId();
  156. }
  157. if (TRowId endRowId = Groups[0]->GetEndRowId(); endRowId != Max<TRowId>()) {
  158. // This would include the last page rows when known
  159. return endRowId;
  160. }
  161. return LastRowId;
  162. }
  163. void FillKey() {
  164. CurrentKey.clear();
  165. if (!IsValid())
  166. return;
  167. ui32 keyIdx = 0;
  168. // Add columns that are present in the part
  169. if (ui32 keyCellsCount = Groups[0]->GetKeyCellsCount()) {
  170. for (;keyIdx < keyCellsCount; ++keyIdx) {
  171. CurrentKey.push_back(Groups[0]->GetKeyCell(keyIdx));
  172. }
  173. }
  174. // Extend with default values if needed
  175. for (;keyIdx < KeyDefaults->Defs.size(); ++keyIdx) {
  176. CurrentKey.push_back(KeyDefaults->Defs[keyIdx]);
  177. }
  178. }
  179. ui64 CountUnscreenedRows(TRowId beginRowId, TRowId endRowId) noexcept {
  180. if (!Screen) {
  181. // Include all rows
  182. return endRowId - beginRowId;
  183. }
  184. TRowId rowId = beginRowId;
  185. ui64 rowCount = 0;
  186. while (rowId < endRowId) {
  187. // Skip screen holes before the current rowId
  188. while (CurrentHole.End <= rowId) {
  189. CurrentHole = TScreen::Next(Screen, CurrentHoleIdx, 1);
  190. }
  191. TRowId next;
  192. if (rowId < CurrentHole.Begin) {
  193. // Skip rows before the next begin
  194. next = Min(CurrentHole.Begin, endRowId);
  195. } else {
  196. // Include rows before the next end
  197. next = Min(CurrentHole.End, endRowId);
  198. rowCount += next - rowId;
  199. }
  200. rowId = next;
  201. }
  202. return rowCount;
  203. }
  204. void AddBlobsSize(TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, ui32 &prevPage) noexcept {
  205. const auto row = GetLastRowId();
  206. const auto end = GetCurrentRowId();
  207. prevPage = frames->Lower(row, prevPage, Max<ui32>());
  208. while (auto &rel = frames->Relation(prevPage)) {
  209. if (rel.Row < end) {
  210. auto channel = Part->GetPageChannel(lob, prevPage);
  211. stats.Add(rel.Size, channel);
  212. ++prevPage;
  213. } else if (!rel.IsHead()) {
  214. Y_ABORT("Got unaligned TFrames head record");
  215. } else {
  216. break;
  217. }
  218. }
  219. }
  220. private:
  221. TIntrusiveConstPtr<TPart> Part;
  222. TIntrusiveConstPtr<TKeyCellDefaults> KeyDefaults;
  223. TSmallVec<TCell> CurrentKey;
  224. ui64 LastRowId = 0;
  225. TVector<THolder<IStatsPartGroupIterator>> Groups;
  226. TVector<THolder<IStatsPartGroupIterator>> HistoricGroups;
  227. TIntrusiveConstPtr<TScreen> Screen;
  228. TIntrusiveConstPtr<TFrames> Small; /* Inverted index for small blobs */
  229. TIntrusiveConstPtr<TFrames> Large; /* Inverted index for large blobs */
  230. size_t CurrentHoleIdx = 0;
  231. TScreen::THole CurrentHole;
  232. ui32 PrevSmallPage = 0;
  233. ui32 PrevLargePage = 0;
  234. };
  235. }}