Browse Source

KIKIMR-19211: buckets optimizer

ivanmorozov 1 year ago
parent
commit
cb95924ef1

+ 5 - 0
.mapping.json

@@ -5403,6 +5403,11 @@
   "ydb/core/tx/columnshard/engines/storage/optimizer/intervals/CMakeLists.linux-x86_64.txt":"",
   "ydb/core/tx/columnshard/engines/storage/optimizer/intervals/CMakeLists.txt":"",
   "ydb/core/tx/columnshard/engines/storage/optimizer/intervals/CMakeLists.windows-x86_64.txt":"",
+  "ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.darwin-x86_64.txt":"",
+  "ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.linux-aarch64.txt":"",
+  "ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.linux-x86_64.txt":"",
+  "ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.txt":"",
+  "ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.windows-x86_64.txt":"",
   "ydb/core/tx/columnshard/engines/storage/optimizer/levels/CMakeLists.darwin-x86_64.txt":"",
   "ydb/core/tx/columnshard/engines/storage/optimizer/levels/CMakeLists.linux-aarch64.txt":"",
   "ydb/core/tx/columnshard/engines/storage/optimizer/levels/CMakeLists.linux-x86_64.txt":"",

+ 19 - 2
ydb/core/tx/columnshard/engines/changes/general_compaction.cpp

@@ -3,6 +3,7 @@
 #include "compaction/column_cursor.h"
 #include "compaction/merge_context.h"
 #include "compaction/merged_column.h"
+#include "counters/general.h"
 
 #include <ydb/core/tx/columnshard/columnshard_impl.h>
 #include <ydb/core/tx/columnshard/engines/portions/with_blobs.h>
@@ -18,11 +19,27 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc
     std::vector<TPortionInfoWithBlobs> portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs);
     Blobs.clear();
     std::optional<TSnapshot> maxSnapshot;
+    i64 portionsSize = 0;
+    i64 portionsCount = 0;
+    i64 insertedPortionsSize = 0;
+    i64 compactedPortionsSize = 0;
+    i64 otherPortionsSize = 0;
     for (auto&& i : SwitchedPortions) {
         if (!maxSnapshot || *maxSnapshot < i.GetMinSnapshot()) {
             maxSnapshot = i.GetMinSnapshot();
         }
+        if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::INSERTED) {
+            insertedPortionsSize += i.GetBlobBytes();
+        } else if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED) {
+            compactedPortionsSize += i.GetBlobBytes();
+        } else {
+            otherPortionsSize += i.GetBlobBytes();
+        }
+        portionsSize += i.GetBlobBytes();
+        ++portionsCount;
     }
+    NChanges::TGeneralCompactionCounters::OnPortionsKind(insertedPortionsSize, compactedPortionsSize, otherPortionsSize);
+    NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize);
     Y_ABORT_UNLESS(maxSnapshot);
 
     static const TString portionIdFieldName = "$$__portion_id";
@@ -208,8 +225,8 @@ NColumnShard::ECumulativeCounters TGeneralCompactColumnEngineChanges::GetCounter
     return isSuccess ? NColumnShard::COUNTER_COMPACTION_SUCCESS : NColumnShard::COUNTER_COMPACTION_FAIL;
 }
 
-void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include) {
-    AFL_VERIFY(CheckPoints.emplace(position, include).second);
+void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include, const bool validationDuplications) {
+    AFL_VERIFY(CheckPoints.emplace(position, include).second || !validationDuplications);
 }
 
 }

+ 1 - 1
ydb/core/tx/columnshard/engines/changes/general_compaction.h

@@ -19,7 +19,7 @@ protected:
 public:
     using TBase::TBase;
 
-    void AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include = true);
+    void AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include = true, const bool validationDuplications = true);
 
     virtual TString TypeString() const override {
         return StaticTypeName();

+ 2 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.darwin-x86_64.txt

@@ -8,6 +8,7 @@
 
 add_subdirectory(abstract)
 add_subdirectory(intervals)
+add_subdirectory(lbuckets)
 add_subdirectory(levels)
 
 add_library(engines-storage-optimizer INTERFACE)
@@ -17,4 +18,5 @@ target_link_libraries(engines-storage-optimizer INTERFACE
   storage-optimizer-abstract
   storage-optimizer-intervals
   storage-optimizer-levels
+  storage-optimizer-lbuckets
 )

+ 2 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-aarch64.txt

@@ -8,6 +8,7 @@
 
 add_subdirectory(abstract)
 add_subdirectory(intervals)
+add_subdirectory(lbuckets)
 add_subdirectory(levels)
 
 add_library(engines-storage-optimizer INTERFACE)
@@ -18,4 +19,5 @@ target_link_libraries(engines-storage-optimizer INTERFACE
   storage-optimizer-abstract
   storage-optimizer-intervals
   storage-optimizer-levels
+  storage-optimizer-lbuckets
 )

+ 2 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.linux-x86_64.txt

@@ -8,6 +8,7 @@
 
 add_subdirectory(abstract)
 add_subdirectory(intervals)
+add_subdirectory(lbuckets)
 add_subdirectory(levels)
 
 add_library(engines-storage-optimizer INTERFACE)
@@ -18,4 +19,5 @@ target_link_libraries(engines-storage-optimizer INTERFACE
   storage-optimizer-abstract
   storage-optimizer-intervals
   storage-optimizer-levels
+  storage-optimizer-lbuckets
 )

+ 2 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/CMakeLists.windows-x86_64.txt

@@ -8,6 +8,7 @@
 
 add_subdirectory(abstract)
 add_subdirectory(intervals)
+add_subdirectory(lbuckets)
 add_subdirectory(levels)
 
 add_library(engines-storage-optimizer INTERFACE)
@@ -17,4 +18,5 @@ target_link_libraries(engines-storage-optimizer INTERFACE
   storage-optimizer-abstract
   storage-optimizer-intervals
   storage-optimizer-levels
+  storage-optimizer-lbuckets
 )

+ 22 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.darwin-x86_64.txt

@@ -0,0 +1,22 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(storage-optimizer-lbuckets)
+target_link_libraries(storage-optimizer-lbuckets PUBLIC
+  contrib-libs-cxxsupp
+  yutil
+  libs-apache-arrow
+  ydb-core-protos
+  core-formats-arrow
+  engines-changes-abstract
+)
+target_sources(storage-optimizer-lbuckets PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp
+)

+ 23 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.linux-aarch64.txt

@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(storage-optimizer-lbuckets)
+target_link_libraries(storage-optimizer-lbuckets PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+  libs-apache-arrow
+  ydb-core-protos
+  core-formats-arrow
+  engines-changes-abstract
+)
+target_sources(storage-optimizer-lbuckets PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp
+)

+ 23 - 0
ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/CMakeLists.linux-x86_64.txt

@@ -0,0 +1,23 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(storage-optimizer-lbuckets)
+target_link_libraries(storage-optimizer-lbuckets PUBLIC
+  contrib-libs-linux-headers
+  contrib-libs-cxxsupp
+  yutil
+  libs-apache-arrow
+  ydb-core-protos
+  core-formats-arrow
+  engines-changes-abstract
+)
+target_sources(storage-optimizer-lbuckets PRIVATE
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.cpp
+  ${CMAKE_SOURCE_DIR}/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp
+)

Some files were not shown because too many files changed in this diff