Browse Source

use adaptive timeouts and persistent node count in hive warmup KIKIMR-20551 (#624)

vporyadke 1 year ago
parent
commit
fdedceebff

+ 25 - 7
ydb/core/mind/hive/hive_impl.cpp

@@ -177,14 +177,30 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects&
     sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id));
 }
 
+TInstant THive::GetAllowedBootingTime() {
+    auto connectedNodes = TabletCounters->Simple()[NHive::COUNTER_NODES_CONNECTED].Get();
+    BLOG_D(connectedNodes << " nodes connected out of " << ExpectedNodes);
+    if (connectedNodes == 0) {
+        return {};
+    }
+    TInstant result = LastConnect + MaxTimeBetweenConnects * std::max<i64>(static_cast<i64>(ExpectedNodes) - static_cast<i64>(connectedNodes), 1);
+    if (connectedNodes < ExpectedNodes) {
+        result = std::max(result, StartTime() + GetWarmUpBootWaitingPeriod());
+    }
+    result = std::min(result, StartTime() + GetMaxWarmUpPeriod());
+    return result;
+}
+
 void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
     TInstant now = TActivationContext::Now();
-    TInstant allowed = std::min(LastConnect + GetWarmUpBootWaitingPeriod(), StartTime() + GetMaxWarmUpPeriod());
-    if (WarmUp && now < allowed) {
-        BLOG_D("ProcessBootQueue - last connect was at " << LastConnect << "- not long enough ago");
-        ProcessBootQueueScheduled = false;
-        PostponeProcessBootQueue(allowed - now);
-        return;
+    if (WarmUp) {
+        TInstant allowed = GetAllowedBootingTime();
+        if (now < allowed) {
+            BLOG_D("ProcessBootQueue - waiting until " << allowed << " because of warmup, now: " << now);
+            ProcessBootQueueScheduled = false;
+            PostponeProcessBootQueue(allowed - now);
+            return;
+        }
     }
     BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")");
     THPTimer bootQueueProcessingTimer;
@@ -304,9 +320,11 @@ void THive::ProcessBootQueue() {
 }
 
 void THive::PostponeProcessBootQueue(TDuration after) {
-    if (!ProcessBootQueuePostponed) {
+    TInstant postponeUntil = TActivationContext::Now() + after;
+    if (!ProcessBootQueuePostponed || postponeUntil < ProcessBootQueuePostponedUntil) {
         BLOG_D("PostponeProcessBootQueue (" << after << ")");
         ProcessBootQueuePostponed = true;
+        ProcessBootQueuePostponedUntil = postponeUntil;
         Schedule(after, new TEvPrivate::TEvPostponeProcessBootQueue());
     }
 }

+ 4 - 0
ydb/core/mind/hive/hive_impl.h

@@ -362,7 +362,10 @@ protected:
     bool ProcessBootQueueScheduled = false;
     bool ProcessBootQueuePostponed = false;
     TInstant LastConnect;
+    TInstant ProcessBootQueuePostponedUntil;
+    TDuration MaxTimeBetweenConnects;
     bool WarmUp;
+    ui64 ExpectedNodes;
 
     THashMap<ui32, TEvInterconnect::TNodeInfo> NodesInfo;
     TTabletCountersBase* TabletCounters;
@@ -903,6 +906,7 @@ protected:
     void ScheduleDisconnectNode(THolder<TEvPrivate::TEvProcessDisconnectNode> event);
     void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet);
     void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects);
+    TInstant GetAllowedBootingTime();
     void ScheduleUnlockTabletExecution(TNodeInfo& node);
     TString DebugDomainsActiveNodes() const;
     TResourceNormalizedValues GetStDevResourceValues() const;

+ 2 - 0
ydb/core/mind/hive/hive_ut.cpp

@@ -672,6 +672,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
                     TMailboxType::Simple, 0,
                     TMailboxType::Simple, 0);
         TTenantPoolConfig::TPtr tenantPoolConfig = new TTenantPoolConfig(localConfig);
+        // tenantPoolConfig->AddStaticSlot(DOMAIN_NAME);
         tenantPoolConfig->AddStaticSlot(tenant);
 
         TActorId actorId = runtime.Register(
@@ -1877,6 +1878,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
 
         Ctest << "killing tablet " << tabletId << Endl;
         runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(0)));
+        // runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(1)));
 
         waitFor([&]{ return blockedCommits.size() >= 2; }, "at least 2 blocked commits");
 

+ 1 - 1
ydb/core/mind/hive/monitoring.cpp

@@ -1869,7 +1869,7 @@ function fillDataShort(result) {
         if ("TotalTablets" in result) {
             var percent = Math.floor(result.RunningTablets * 100 / result.TotalTablets) + '%';
             var values = result.RunningTablets + ' of ' + result.TotalTablets;
-            var warmup = result.Warmup ? "<span class='glyphicon glyphicon-fire' style='color:red; margin-right:4px'></span>" : "";
+            var warmup = result.WarmUp ? "<span class='glyphicon glyphicon-fire' style='color:red; margin-right:4px'></span>" : "";
             $('#runningTablets').html(warmup + percent + ' (' + values + ')');
             $('#aliveNodes').html(result.AliveNodes);
             $('#bootQueue').html(result.BootQueueSize);

+ 2 - 1
ydb/core/mind/hive/tx__load_everything.cpp

@@ -727,7 +727,8 @@ public:
         Self->SetCounterTabletsTotal(tabletsTotal);
         Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_FREE].Set(Self->Sequencer.FreeSize());
         Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_ALLOCATED].Set(Self->Sequencer.AllocatedSequencesSize());
-        Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->Nodes.size());
+        Self->ExpectedNodes = Self->Nodes.size();
+        Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->ExpectedNodes);
         Self->MigrationState = NKikimrHive::EMigrationState::MIGRATION_READY;
         ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets());
 

+ 5 - 1
ydb/core/mind/hive/tx__status.cpp

@@ -33,7 +33,11 @@ public:
             }
             if (Self->WarmUp &&
                 node.Statistics.RestartTimestampSize() < Self->GetNodeRestartsToIgnoreInWarmup()) {
-                Self->LastConnect = TActivationContext::Now();
+                TInstant now = TActivationContext::Now();
+                if (Self->LastConnect != TInstant{}) {
+                    Self->MaxTimeBetweenConnects = std::max(Self->MaxTimeBetweenConnects, now - Self->LastConnect);
+                }
+                Self->LastConnect = now;
             }
             if (node.LocationAcquired) {
                 NIceDb::TNiceDb db(txc.DB);

+ 2 - 2
ydb/core/protos/config.proto

@@ -1384,9 +1384,9 @@ message THiveConfig {
     repeated NKikimrTabletBase.TTabletTypes.EType BalancerIgnoreTabletTypes = 49;
     optional double SpaceUsagePenaltyThreshold = 53 [default  = 1.1]; // number > 1
     optional double SpaceUsagePenalty = 54 [default = 0.2]; // number <= 1
-    optional uint64 WarmUpBootWaitingPeriod = 50  [default = 5000]; // milliseconds
+    optional uint64 WarmUpBootWaitingPeriod = 50  [default = 30000]; // milliseconds, time to wait for known nodes on cluster restart
     optional uint64 NodeRestartsToIgnoreInWarmup = 51 [default = 10];
-    optional double MaxWarmUpPeriod = 52 [default = 30.0]; // seconds
+    optional double MaxWarmUpPeriod = 52 [default = 600.0]; // seconds
     optional bool WarmUpEnabled = 55 [default = true];
     optional uint64 EmergencyBalancerInflight = 56 [default = 1]; // tablets
     optional uint64 MaxMovementsOnEmergencyBalancer = 57 [default = 2];