Browse Source

Supported autopartition of topics in the workload (#7420)

Nikolay Shestakov 7 months ago
parent
commit
7c90c71e72

+ 35 - 6
ydb/public/lib/ydb_cli/commands/topic_operations_scenario.cpp

@@ -91,13 +91,18 @@ void TTopicOperationsScenario::InitStatsCollector()
 void TTopicOperationsScenario::CreateTopic(const TString& database,
                                            const TString& topic,
                                            ui32 partitionCount,
-                                           ui32 consumerCount)
+                                           ui32 consumerCount,
+                                           bool autoscaling,
+                                           ui32 maxPartitionCount,
+                                           ui32 stabilizationWindowSeconds,
+                                           ui32 upUtilizationPercent,
+                                           ui32 downUtilizationPercent)
 {
     auto topicPath =
         TCommandWorkloadTopicDescribe::GenerateFullTopicName(database, topic);
 
     EnsureTopicNotExist(topicPath);
-    CreateTopic(topicPath, partitionCount, consumerCount);
+    CreateTopic(topicPath, partitionCount, consumerCount, autoscaling, maxPartitionCount, stabilizationWindowSeconds, upUtilizationPercent, downUtilizationPercent);
 }
 
 void TTopicOperationsScenario::DropTopic(const TString& database,
@@ -155,14 +160,32 @@ void TTopicOperationsScenario::EnsureTopicNotExist(const TString& topic)
 
 void TTopicOperationsScenario::CreateTopic(const TString& topic,
                                            ui32 partitionCount,
-                                           ui32 consumerCount)
+                                           ui32 consumerCount,
+                                           bool autoscaling,
+                                           ui32 maxPartitionCount,
+                                           ui32 stabilizationWindowSeconds,
+                                           ui32 upUtilizationPercent,
+                                           ui32 downUtilizationPercent)
 {
     Y_ABORT_UNLESS(Driver);
 
     NTopic::TTopicClient client(*Driver);
 
     NTopic::TCreateTopicSettings settings;
-    settings.PartitioningSettings(partitionCount, partitionCount);
+    if (autoscaling) {
+        settings.BeginConfigurePartitioningSettings()
+            .MinActivePartitions(partitionCount)
+            .MaxActivePartitions(maxPartitionCount)
+            .BeginConfigureAutoPartitioningSettings()
+                .Strategy(NTopic::EAutoPartitioningStrategy::ScaleUpAndDown)
+                .StabilizationWindow(TDuration::Seconds(stabilizationWindowSeconds))
+                .UpUtilizationPercent(upUtilizationPercent)
+                .DownUtilizationPercent(downUtilizationPercent)
+            .EndConfigureAutoPartitioningSettings()
+            .EndConfigurePartitioningSettings();
+    } else {
+        settings.PartitioningSettings(partitionCount, partitionCount);
+    }
 
     for (unsigned consumerIdx = 0; consumerIdx < consumerCount; ++consumerIdx) {
         settings
@@ -223,8 +246,12 @@ void TTopicOperationsScenario::StartConsumerThreads(std::vector<std::future<void
 void TTopicOperationsScenario::StartProducerThreads(std::vector<std::future<void>>& threads,
                                                     ui32 partitionCount,
                                                     ui32 partitionSeed,
-                                                    const std::vector<TString>& generatedMessages)
+                                                    const std::vector<TString>& generatedMessages,
+                                                    const TString& database)
 {
+    auto describeTopicResult = TCommandWorkloadTopicDescribe::DescribeTopic(database, TopicName, *Driver);
+    bool useAutoPartitioning = NYdb::NTopic::EAutoPartitioningStrategy::Disabled != describeTopicResult.GetPartitioningSettings().GetAutoPartitioningSettings().GetStrategy();
+
     auto count = std::make_shared<std::atomic_uint>();
     for (ui32 writerIdx = 0; writerIdx < ProducerThreadCount; ++writerIdx) {
         TTopicWorkloadWriterParams writerParams{
@@ -236,6 +263,7 @@ void TTopicOperationsScenario::StartProducerThreads(std::vector<std::future<void
             .ErrorFlag = ErrorFlag,
             .StartedCount = count,
             .GeneratedMessages = generatedMessages,
+            .Database = database,
             .TopicName = TopicName,
             .ByteRate = MessageRate != 0 ? MessageRate * MessageSize : ByteRate,
             .MessageSize = MessageSize,
@@ -245,7 +273,8 @@ void TTopicOperationsScenario::StartProducerThreads(std::vector<std::future<void
             .PartitionId = (partitionSeed + writerIdx) % partitionCount,
             .Direct = Direct,
             .Codec = Codec,
-            .UseTransactions = UseTransactions
+            .UseTransactions = UseTransactions,
+            .UseAutoPartitioning = useAutoPartitioning
         };
 
         threads.push_back(std::async([writerParams = std::move(writerParams)]() mutable { TTopicWorkloadWriterWorker::WriterLoop(writerParams); }));

+ 19 - 3
ydb/public/lib/ydb_cli/commands/topic_operations_scenario.h

@@ -53,6 +53,11 @@ public:
     double Percentile = 99.0;
     TString TopicName;
     ui32 TopicPartitionCount = 1;
+    bool TopicAutoscaling = false;
+    ui32 TopicMaxPartitionCount = 100;
+    ui32 StabilizationWindowSeconds = 15;
+    ui32 UpUtilizationPercent = 90;
+    ui32 DownUtilizationPercent = 30;
     ui32 ProducerThreadCount = 0;
     ui32 ConsumerThreadCount = 0;
     ui32 ConsumerCount = 0;
@@ -76,7 +81,12 @@ protected:
     void CreateTopic(const TString& database,
                      const TString& topic,
                      ui32 partitionCount,
-                     ui32 consumerCount);
+                     ui32 consumerCount,
+                     bool autoscaling = false,
+                     ui32 maxPartitionCount = 100,
+                     ui32 stabilizationWindowSeconds = 15,
+                     ui32 upUtilizationPercent = 90,
+                     ui32 downUtilizationPercent = 30);
     void DropTopic(const TString& database,
                    const TString& topic);
 
@@ -90,7 +100,8 @@ protected:
     void StartProducerThreads(std::vector<std::future<void>>& threads,
                               ui32 partitionCount,
                               ui32 partitionSeed,
-                              const std::vector<TString>& generatedMessages);
+                              const std::vector<TString>& generatedMessages,
+                              const TString& database);
     void JoinThreads(const std::vector<std::future<void>>& threads);
 
     bool AnyErrors() const;
@@ -108,7 +119,12 @@ private:
     void EnsureTopicNotExist(const TString& topic);
     void CreateTopic(const TString& topic,
                      ui32 partitionCount,
-                     ui32 consumerCount);
+                     ui32 consumerCount,
+                     bool autoscaling,
+                     ui32 maxPartitionCount,
+                     ui32 stabilizationWindowSeconds,
+                     ui32 upUtilizationPercent,
+                     ui32 downUtilizationPercent);
 
     static NTable::TSession GetSession(NTable::TTableClient& client);
 

+ 1 - 1
ydb/public/lib/ydb_cli/commands/topic_readwrite_scenario.cpp

@@ -21,7 +21,7 @@ int TTopicReadWriteScenario::DoRun(const TClientCommand::TConfig& config)
     std::vector<std::future<void>> threads;
 
     StartConsumerThreads(threads, config.Database);
-    StartProducerThreads(threads, partitionCount, partitionSeed, generatedMessages);
+    StartProducerThreads(threads, partitionCount, partitionSeed, generatedMessages, config.Database);
 
     StatsCollector->PrintWindowStatsLoop();
 

+ 17 - 1
ydb/public/lib/ydb_cli/commands/topic_workload/topic_workload_init.cpp

@@ -10,7 +10,7 @@ using namespace NYdb::NConsoleClient;
 
 int TCommandWorkloadTopicInit::TScenario::DoRun(const TConfig& config)
 {
-    CreateTopic(config.Database, TopicName, TopicPartitionCount, ConsumerCount);
+    CreateTopic(config.Database, TopicName, TopicPartitionCount, ConsumerCount, TopicAutoscaling, TopicMaxPartitionCount, StabilizationWindowSeconds, UpUtilizationPercent, DownUtilizationPercent);
 
     return EXIT_SUCCESS;
 }
@@ -39,6 +39,22 @@ void TCommandWorkloadTopicInit::Config(TConfig& config)
     config.Opts->AddLongOption('c', "consumers", "Number of consumers in the topic.")
         .DefaultValue(1)
         .StoreResult(&Scenario.ConsumerCount);
+
+
+    config.Opts->AddLongOption('a', "auto-partitioning", "Enable autopartitioning of topic.")
+        .DefaultValue(false)
+        .StoreTrue(&Scenario.TopicAutoscaling);
+    config.Opts->AddLongOption('m', "auto-partitioning-max-partitions-count", "Max number of partitions in the topic.")
+        .StoreResult(&Scenario.TopicMaxPartitionCount);
+    config.Opts->AddLongOption("auto-partitioning-stabilization-window-seconds", "Duration in seconds of high or low load before automatically scale the number of partitions")
+        .Optional()
+        .StoreResult(&Scenario.StabilizationWindowSeconds);
+    config.Opts->AddLongOption("auto-partitioning-up-utilization-percent", "The load percentage at which the number of partitions will increase")
+        .Optional()
+        .StoreResult(&Scenario.UpUtilizationPercent);
+    config.Opts->AddLongOption("auto-partitioning-down-utilization-percent", "The load percentage at which the number of partitions will decrease")
+        .Optional()
+        .StoreResult(&Scenario.DownUtilizationPercent);
 }
 
 void TCommandWorkloadTopicInit::Parse(TConfig& config)

+ 7 - 4
ydb/public/lib/ydb_cli/commands/topic_workload/topic_workload_reader.cpp

@@ -26,6 +26,7 @@ void TTopicWorkloadReader::ReaderLoop(TTopicWorkloadReaderParams& params, TInsta
 
     auto describeTopicResult = TCommandWorkloadTopicDescribe::DescribeTopic(params.Database, params.TopicName, params.Driver);
     NYdb::NTopic::TReadSessionSettings settings;
+    settings.AutoPartitioningSupport(true);
 
     if (!params.ReadWithoutConsumer) {
         auto consumerName = TCommandWorkloadTopicDescribe::GenerateConsumerName(params.ConsumerPrefix, params.ConsumerIdx);
@@ -45,8 +46,8 @@ void TTopicWorkloadReader::ReaderLoop(TTopicWorkloadReaderParams& params, TInsta
         }
         settings.WithoutConsumer().AppendTopics(topic);
     }
-    
-    
+
+
     if (params.UseTransactions) {
         txSupport.emplace(params.Driver, params.ReadOnlyTableName, params.TableName);
     }
@@ -97,8 +98,8 @@ void TTopicWorkloadReader::ReaderLoop(TTopicWorkloadReaderParams& params, TInsta
                         txSupport->AppendRow(message.GetData());
                     }
 
-                    WRITE_LOG(params.Log, ELogPriority::TLOG_DEBUG, TStringBuilder() << "Got message: " << message.GetMessageGroupId() 
-                        << " topic " << message.GetPartitionSession()->GetTopicPath() << " partition " << message.GetPartitionSession()->GetPartitionId() 
+                    WRITE_LOG(params.Log, ELogPriority::TLOG_DEBUG, TStringBuilder() << "Got message: " << message.GetMessageGroupId()
+                        << " topic " << message.GetPartitionSession()->GetTopicPath() << " partition " << message.GetPartitionSession()->GetPartitionId()
                         << " offset " << message.GetOffset() << " seqNo " << message.GetSeqNo()
                         << " createTime " << message.GetCreateTime() << " fullTimeMs " << fullTime);
                 }
@@ -126,6 +127,8 @@ void TTopicWorkloadReader::ReaderLoop(TTopicWorkloadReaderParams& params, TInsta
                 WRITE_LOG(params.Log, ELogPriority::TLOG_ERR, TStringBuilder() << "Read session closed: " << closeSessionEvent->DebugString());
                 *params.ErrorFlag = 1;
                 break;
+            } else if (auto* endPartitionStreamEvent = std::get_if<NYdb::NTopic::TReadSessionEvent::TEndPartitionSessionEvent>(&event)) {
+                endPartitionStreamEvent->Confirm();
             } else if (auto* partitionStreamStatusEvent = std::get_if<NYdb::NTopic::TReadSessionEvent::TPartitionSessionStatusEvent>(&event)) {
                 WRITE_LOG(params.Log, ELogPriority::TLOG_DEBUG, TStringBuilder() << partitionStreamStatusEvent->DebugString())
 

+ 12 - 5
ydb/public/lib/ydb_cli/commands/topic_workload/topic_workload_writer.cpp

@@ -1,4 +1,5 @@
 #include "topic_workload_writer.h"
+#include "topic_workload_describe.h"
 
 #include <util/generic/overloaded.h>
 
@@ -81,7 +82,7 @@ bool TTopicWorkloadWriterWorker::WaitForInitSeqNo()
 
 void TTopicWorkloadWriterWorker::Process() {
     Sleep(TDuration::Seconds((float)Params.WarmupSec * Params.WriterIdx / Params.ProducerThreadCount));
-    
+
     const TInstant endTime = TInstant::Now() + TDuration::Seconds(Params.TotalSec);
 
     StartTimestamp = Now();
@@ -120,13 +121,13 @@ void TTopicWorkloadWriterWorker::Process() {
                 writingAllowed &= BytesWritten < bytesMustBeWritten;
                 WRITE_LOG(Params.Log, ELogPriority::TLOG_DEBUG, TStringBuilder() << "BytesWritten " << BytesWritten << " bytesMustBeWritten " << bytesMustBeWritten << " writingAllowed " << writingAllowed);
             }
-            else 
+            else
             {
                 writingAllowed &= InflightMessages.size() <= 1_MB / Params.MessageSize;
                 WRITE_LOG(Params.Log, ELogPriority::TLOG_DEBUG, TStringBuilder() << "Inflight size " << InflightMessages.size() << " writingAllowed " << writingAllowed);
             }
 
-            if (writingAllowed) 
+            if (writingAllowed)
             {
                 TString data = GetGeneratedMessage();
 
@@ -142,7 +143,7 @@ void TTopicWorkloadWriterWorker::Process() {
                 ContinuationToken.Clear();
                 MessageId++;
             }
-            else 
+            else
                 Sleep(TDuration::MilliSeconds(1));
 
             if (events.empty())
@@ -220,11 +221,17 @@ bool TTopicWorkloadWriterWorker::ProcessSessionClosedEvent(
 
 void TTopicWorkloadWriterWorker::CreateWorker() {
     WRITE_LOG(Params.Log, ELogPriority::TLOG_INFO, TStringBuilder() << "Create writer worker for ProducerId " << Params.ProducerId << " PartitionId " << Params.PartitionId);
+
     NYdb::NTopic::TWriteSessionSettings settings;
     settings.Codec((NYdb::NTopic::ECodec)Params.Codec);
     settings.Path(Params.TopicName);
     settings.ProducerId(Params.ProducerId);
-    settings.PartitionId(Params.PartitionId);
+    if (Params.UseAutoPartitioning) {
+        settings.MessageGroupId(Params.ProducerId);
+    } else {
+        settings.PartitionId(Params.PartitionId);
+    }
+
     settings.DirectWriteToPartition(Params.Direct);
     WriteSession = NYdb::NTopic::TTopicClient(Params.Driver).CreateWriteSession(settings);
 }

+ 2 - 0
ydb/public/lib/ydb_cli/commands/topic_workload/topic_workload_writer.h

@@ -20,6 +20,7 @@ namespace NYdb {
             std::shared_ptr<std::atomic<bool>> ErrorFlag;
             std::shared_ptr<std::atomic_uint> StartedCount;
             const std::vector<TString>& GeneratedMessages;
+            TString Database;
             TString TopicName;
             size_t ByteRate;
             size_t MessageSize;
@@ -30,6 +31,7 @@ namespace NYdb {
             bool Direct;
             ui32 Codec = 0;
             bool UseTransactions = false;
+            bool UseAutoPartitioning = false;
         };
 
         class TTopicWorkloadWriterWorker {

+ 1 - 1
ydb/public/lib/ydb_cli/commands/topic_write_scenario.cpp

@@ -21,7 +21,7 @@ int TTopicWriteScenario::DoRun(const TClientCommand::TConfig& config)
     std::vector<std::future<void>> threads;
 
     StartConsumerThreads(threads, config.Database);
-    StartProducerThreads(threads, partitionCount, partitionSeed, generatedMessages);
+    StartProducerThreads(threads, partitionCount, partitionSeed, generatedMessages, config.Database);
 
     StatsCollector->PrintWindowStatsLoop();