Просмотр исходного кода

Fix wrong active and inactive partition number (#10712)

Nikolay Shestakov 4 месяцев назад
Родитель
Сommit
268516ff68

+ 19 - 8
ydb/core/persqueue/read_balancer__balancing.cpp

@@ -396,6 +396,9 @@ void TPartitionFamily::InactivatePartition(ui32 partitionId) {
 }
 
  void TPartitionFamily::ChangePartitionCounters(ssize_t active, ssize_t inactive) {
+    Y_VERIFY_DEBUG((ssize_t)ActivePartitionCount + active >= 0);
+    Y_VERIFY_DEBUG((ssize_t)InactivePartitionCount + inactive >= 0);
+
     ActivePartitionCount += active;
     InactivePartitionCount += inactive;
 
@@ -985,7 +988,7 @@ bool TConsumer::SetCommittedState(ui32 partitionId, ui32 generation, ui64 cookie
     return Partitions[partitionId].SetCommittedState(generation, cookie);
 }
 
-bool TConsumer::ProccessReadingFinished(ui32 partitionId, const TActorContext& ctx) {
+bool TConsumer::ProccessReadingFinished(ui32 partitionId, bool wasInactive, const TActorContext& ctx) {
     if (!ScalingSupport()) {
         return false;
     }
@@ -996,7 +999,9 @@ bool TConsumer::ProccessReadingFinished(ui32 partitionId, const TActorContext& c
     if (!family) {
         return false;
     }
-    family->InactivatePartition(partitionId);
+    if (!wasInactive) {
+        family->InactivatePartition(partitionId);
+    }
 
     if (!family->IsLonely() && partition.Commited) {
         if (BreakUpFamily(family, partitionId, false, ctx)) {
@@ -1065,8 +1070,13 @@ void TConsumer::StartReading(ui32 partitionId, const TActorContext& ctx) {
     }
 
     auto* partition = GetPartition(partitionId);
+    if (!partition) {
+        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER,
+                GetPrefix() << "Reading of the partition " << partitionId << " was started by " << ConsumerName << ".");
+    }
 
-    if (partition && partition->StartReading()) {
+    auto wasInactive = partition->IsInactive();
+    if (partition->StartReading()) {
         LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER,
                 GetPrefix() << "Reading of the partition " << partitionId << " was started by " << ConsumerName << ". We stop reading from child partitions.");
 
@@ -1080,7 +1090,9 @@ void TConsumer::StartReading(ui32 partitionId, const TActorContext& ctx) {
             return;
         }
 
-        family->ActivatePartition(partitionId);
+        if (wasInactive) {
+            family->ActivatePartition(partitionId);
+        }
 
         // We releasing all children's partitions because we don't start reading the partition from EndOffset
         GetPartitionGraph().Travers(partitionId, [&](ui32 partitionId) {
@@ -1097,8 +1109,6 @@ void TConsumer::StartReading(ui32 partitionId, const TActorContext& ctx) {
             return true;
         });
     } else {
-        LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER,
-                GetPrefix() << "Reading of the partition " << partitionId << " was started by " << ConsumerName << ".");
     }
 }
 
@@ -1139,7 +1149,7 @@ void TConsumer::FinishReading(TEvPersQueue::TEvReadingPartitionFinishedRequest::
                     GetPrefix() << "Reading of the partition " << partitionId << " was finished by " << r.GetConsumer()
                     << ", firstMessage=" << r.GetStartedReadingFromEndOffset() << ", " << GetSdkDebugString0(r.GetScaleAwareSDK()));
 
-        if (ProccessReadingFinished(partitionId, ctx)) {
+        if (ProccessReadingFinished(partitionId, false, ctx)) {
             ScheduleBalance(ctx);
         }
     } else if (!partition.IsInactive()) {
@@ -1540,11 +1550,12 @@ bool TBalancer::SetCommittedState(const TString& consumerName, ui32 partitionId,
         return false;
     }
 
+    auto wasInactive = consumer->IsInactive(partitionId);
     if (consumer->SetCommittedState(partitionId, generation, cookie)) {
         LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER,
                 GetPrefix() << "The offset of the partition " << partitionId << " was commited by " << consumerName);
 
-        if (consumer->ProccessReadingFinished(partitionId, ctx)) {
+        if (consumer->ProccessReadingFinished(partitionId, wasInactive, ctx)) {
             consumer->ScheduleBalance(ctx);
         }
 

+ 1 - 1
ydb/core/persqueue/read_balancer__balancing.h

@@ -226,7 +226,7 @@ struct TConsumer {
     bool Unlock(const TActorId& sender, ui32 partitionId, const TActorContext& ctx);
 
     bool SetCommittedState(ui32 partitionId, ui32 generation, ui64 cookie);
-    bool ProccessReadingFinished(ui32 partitionId, const TActorContext& ctx);
+    bool ProccessReadingFinished(ui32 partitionId, bool wasInactive, const TActorContext& ctx);
     void StartReading(ui32 partitionId, const TActorContext& ctx);
     void FinishReading(TEvPersQueue::TEvReadingPartitionFinishedRequest::TPtr& ev, const TActorContext& ctx);
 

+ 43 - 5
ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp

@@ -937,7 +937,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) {
         return balancerTabletId;
     }
 
-    void SplitPartition(TTopicSdkTestSetup& setup, const TString& topicPath, ui32 partitionId) {
+    void SplitPartitionRB(TTopicSdkTestSetup& setup, const TString& topicPath, ui32 partitionId) {
         auto balancerTabletId = GetBalancerTabletId(setup, topicPath);
         auto edge = setup.GetRuntime().AllocateEdgeActor();
         setup.GetRuntime().SendToPipe(balancerTabletId, edge, new TEvPQ::TEvPartitionScaleStatusChanged(partitionId, NKikimrPQ::EScaleStatus::NEED_SPLIT));
@@ -968,8 +968,6 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) {
         auto tableClient = setup.MakeTableClient();
         auto session = tableClient.CreateSession().GetValueSync().GetSession();
 
-        setup.GetServer().AnnoyingClient->MkDir("/Root", "dir");
-
         ExecuteQuery(session, R"(
             --!syntax_v1
             CREATE TOPIC `/Root/dir/origin`
@@ -980,7 +978,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) {
         )");
 
         AssertPartitionCount(setup, "/Root/dir/origin", 1);
-        SplitPartition(setup, "/Root/dir/origin", 0);
+        SplitPartitionRB(setup, "/Root/dir/origin", 0);
         WaitAndAssertPartitionCount(setup, "/Root/dir/origin", 3);
     }
 
@@ -1009,10 +1007,50 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) {
         )");
 
         AssertPartitionCount(setup, "/Root/origin/feed", 1);
-        SplitPartition(setup, "/Root/origin/feed/streamImpl", 0);
+        SplitPartitionRB(setup, "/Root/origin/feed/streamImpl", 0);
         WaitAndAssertPartitionCount(setup, "/Root/origin/feed", 3);
     }
 
+    Y_UNIT_TEST(BalancingAfterSplit_sessionsWithPartition) {
+        TTopicSdkTestSetup setup = CreateSetup();
+        setup.CreateTopicWithAutoscale(TEST_TOPIC, TEST_CONSUMER, 1, 100);
+
+        TTopicClient client = setup.MakeClient();
+
+        auto writeSession = CreateWriteSession(client, "producer-1", 0);
+        UNIT_ASSERT(writeSession->Write(Msg("message_1.1", 2)));
+
+        ui64 txId = 1023;
+        SplitPartition(setup, ++txId, 0, "a");
+
+        auto readSession0 = CreateTestReadSession({ .Name="Session-0", .Setup=setup, .Sdk = SdkVersion::Topic, .ExpectedMessagesCount = 1, .AutoCommit = false, .Partitions = {0}, .AutoPartitioningSupport = true });
+
+        readSession0->WaitAndAssertPartitions({0}, "Must read partition 0");
+        readSession0->WaitAllMessages();
+
+
+        for(size_t i = 0; i < 10; ++i) {
+            auto events = readSession0->GetEndedPartitionEvents();
+            if (events.empty()) {
+                Sleep(TDuration::Seconds(1));
+                continue;
+            }
+            readSession0->Commit();
+            break;
+        }
+
+        auto readSession1 = CreateTestReadSession({ .Name="Session-1", .Setup=setup, .Sdk = SdkVersion::Topic, .AutoCommit = false, .Partitions = {1}, .AutoPartitioningSupport = true });
+        readSession1->WaitAndAssertPartitions({1}, "Must read partition 1");
+
+        auto readSession2 = CreateTestReadSession({ .Name="Session-2", .Setup=setup, .Sdk = SdkVersion::Topic, .AutoCommit = false, .Partitions = {2}, .AutoPartitioningSupport = true });
+        readSession2->WaitAndAssertPartitions({2}, "Must read partition 2");
+
+        writeSession->Close();
+        readSession0->Close();
+        readSession1->Close();
+        readSession2->Close();
+    }
+
     Y_UNIT_TEST(MidOfRange) {
         auto AsString = [](std::vector<ui16> vs) {
             TStringBuilder a;