Browse Source

optimize cluster handler (#8619)

Alexey Efimov 6 months ago
parent
commit
89a663c665

+ 1 - 1
ydb/core/viewer/json_handlers_viewer.cpp

@@ -203,7 +203,7 @@ void InitViewerStorageUsageJsonHandler(TJsonHandlers &handlers) {
 }
 
 void InitViewerClusterJsonHandler(TJsonHandlers& handlers) {
-    handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()));
+    handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()), 2);
 }
 
 void InitViewerLabeledCountersJsonHandler(TJsonHandlers &handlers) {

+ 5 - 0
ydb/core/viewer/json_pipe_req.cpp

@@ -400,6 +400,11 @@ TViewerPipeClient::TRequestResponse<NSysView::TEvSysView::TEvGetPDisksResponse>
     return MakeRequestToPipe<NSysView::TEvSysView::TEvGetPDisksResponse>(pipeClient, request.release());
 }
 
+TViewerPipeClient::TRequestResponse<NSysView::TEvSysView::TEvGetStorageStatsResponse> TViewerPipeClient::RequestBSControllerStorageStats() {
+    TActorId pipeClient = ConnectTabletPipe(GetBSControllerId());
+    return MakeRequestToPipe<NSysView::TEvSysView::TEvGetStorageStatsResponse>(pipeClient, new NSysView::TEvSysView::TEvGetStorageStatsRequest());
+}
+
 void TViewerPipeClient::RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force) {
     TActorId pipeClient = ConnectTabletPipe(GetBSControllerId());
     THolder<TEvBlobStorage::TEvControllerConfigRequest> request = MakeHolder<TEvBlobStorage::TEvControllerConfigRequest>();

+ 1 - 0
ydb/core/viewer/json_pipe_req.h

@@ -222,6 +222,7 @@ protected:
     TRequestResponse<NSysView::TEvSysView::TEvGetStoragePoolsResponse> RequestBSControllerPools();
     TRequestResponse<NSysView::TEvSysView::TEvGetVSlotsResponse> RequestBSControllerVSlots();
     TRequestResponse<NSysView::TEvSysView::TEvGetPDisksResponse> RequestBSControllerPDisks();
+    TRequestResponse<NSysView::TEvSysView::TEvGetStorageStatsResponse> RequestBSControllerStorageStats();
     void RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force = false);
     void RequestSchemeCacheNavigate(const TString& path);
     void RequestSchemeCacheNavigate(const TPathId& pathId);

+ 16 - 9
ydb/core/viewer/protos/viewer.proto

@@ -314,8 +314,11 @@ enum EFlag {
 }
 
 message TClusterInfo {
-    string Name = 1;
-    EFlag Overall = 2;
+    uint32 Version = 1;
+    string Name = 2;
+    string Domain = 3;
+    EFlag Overall = 5;
+    repeated string Problems = 9;
     uint32 NodesTotal = 10;
     uint32 NodesAlive = 11;
     uint32 NumberOfCpus = 20;
@@ -324,13 +327,17 @@ message TClusterInfo {
     uint64 MemoryUsed = 31;
     uint64 StorageTotal = 40;
     uint64 StorageUsed = 41;
-    repeated string DataCenters = 42;
-    repeated string Versions = 43;
-    repeated NKikimrWhiteboard.TTabletStateInfo SystemTablets = 16;
-    uint64 Hosts = 44;
-    uint64 Tenants = 45;
-    uint64 Tablets = 46;
-    string Domain = 47;
+    map<string, uint64> MapStorageTotal = 42;
+    map<string, uint64> MapStorageUsed = 43;
+    repeated string DataCenters = 44;
+    map<string, uint32> MapDataCenters = 45;
+    repeated string Versions = 46;
+    map<string, uint32> MapVersions = 47;
+    map<string, uint32> MapNodeStates = 48;
+    repeated NKikimrWhiteboard.TTabletStateInfo SystemTablets = 50;
+    repeated NKikimrSysView.TStorageStatsEntry StorageStats = 51;
+    uint64 Hosts = 60;
+    uint64 Tenants = 61;
 }
 
 enum ETenantType {

+ 646 - 335
ydb/core/viewer/viewer_cluster.h

@@ -2,50 +2,98 @@
 #include "json_handlers.h"
 #include "json_pipe_req.h"
 #include "viewer.h"
-#include "viewer_bsgroupinfo.h"
-#include "viewer_pdiskinfo.h"
+#include "viewer_helper.h"
 #include "viewer_tabletinfo.h"
-#include "viewer_vdiskinfo.h"
+#include <library/cpp/protobuf/json/proto2json.h>
 
 namespace NKikimr::NViewer {
 
+using namespace NProtobufJson;
 using namespace NActors;
 using namespace NNodeWhiteboard;
 
 class TJsonCluster : public TViewerPipeClient {
     using TThis = TJsonCluster;
     using TBase = TViewerPipeClient;
-    IViewer* Viewer;
-    NMon::TEvHttpInfo::TPtr Event;
-    THolder<TEvInterconnect::TEvNodesInfo> NodesInfo;
-    TMap<TNodeId, NKikimrWhiteboard::TEvSystemStateResponse> SystemInfo;
-    TMap<TNodeId, NKikimrWhiteboard::TEvVDiskStateResponse> VDiskInfo;
-    TMap<TNodeId, NKikimrWhiteboard::TEvPDiskStateResponse> PDiskInfo;
-    TMap<TNodeId, NKikimrWhiteboard::TEvBSGroupStateResponse> BSGroupInfo;
-    TMap<TNodeId, NKikimrWhiteboard::TEvTabletStateResponse> TabletInfo;
-    THolder<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult> DescribeResult;
-    TSet<TNodeId> NodesAlive;
+    std::optional<TRequestResponse<TEvInterconnect::TEvNodesInfo>> NodesInfoResponse;
+    std::optional<TRequestResponse<TEvWhiteboard::TEvNodeStateResponse>> NodeStateResponse;
+    std::optional<TRequestResponse<NConsole::TEvConsole::TEvListTenantsResponse>> ListTenantsResponse;
+    std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetPDisksResponse>> PDisksResponse;
+    std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetStorageStatsResponse>> StorageStatsResponse;
+    std::optional<TRequestResponse<TEvHive::TEvResponseHiveNodeStats>> HiveNodeStatsResponse;
+
+    int WhiteboardStateRequestsInFlight = 0;
+    std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvSystemStateResponse>> SystemStateResponse;
+    std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvTabletStateResponse>> TabletStateResponse;
+    std::unordered_map<TNodeId, TRequestResponse<TEvViewer::TEvViewerResponse>> SystemViewerResponse;
+    std::unordered_map<TNodeId, TRequestResponse<TEvViewer::TEvViewerResponse>> TabletViewerResponse;
+
+    struct TNode {
+        TEvInterconnect::TNodeInfo NodeInfo;
+        NKikimrWhiteboard::TSystemStateInfo SystemState;
+        TNodeId NodeId;
+        TString DataCenter;
+        TSubDomainKey SubDomainKey;
+        bool Static = false;
+        bool Connected = false;
+        bool Disconnected = false;
+
+        int GetCandidateScore() const {
+            int score = 0;
+            if (Connected) {
+                score += 100;
+            }
+            if (Static) {
+                score += 10;
+            }
+            return score;
+        }
+    };
+
+    struct TNodeBatch {
+        std::vector<TNode*> NodesToAskFor;
+        std::vector<TNode*> NodesToAskAbout;
+        size_t Offset = 0;
+        bool HasStaticNodes = false;
+
+        TNodeId ChooseNodeId() {
+            if (Offset >= NodesToAskFor.size()) {
+                return 0;
+            }
+            return NodesToAskFor[Offset++]->NodeId;
+        }
+    };
+
+    using TNodeData = std::vector<TNode>;
+    using TNodeCache = std::unordered_map<TNodeId, TNode*>;
+
+    TNodeData NodeData;
+    TNodeCache NodeCache;
+    std::unordered_map<TNodeId, TNodeBatch> NodeBatches;
+    std::vector<TString> Problems;
+
+    void AddProblem(const TString& problem) {
+        for (const auto& p : Problems) {
+            if (p == problem) {
+                return;
+            }
+        }
+        Problems.push_back(problem);
+    }
+
+    NKikimrViewer::TClusterInfo ClusterInfo;
+
+    std::unordered_set<TTabletId> FilterTablets;
+    bool OffloadMerge = true;
+    size_t OffloadMergeAttempts = 2;
+    TTabletId RootHiveId = 0;
     TJsonSettings JsonSettings;
     ui32 Timeout;
-    ui32 TenantsNumber = 0;
     bool Tablets = false;
 
-    struct TEventLog {
-        bool IsTimeout = false;
-        TInstant StartTime;
-        TInstant StartHandleListTenantsResponseTime;
-        TInstant StartHandleNodesInfoTime;
-        TInstant StartMergeBSGroupsTime;
-        TInstant StartMergeVDisksTime;
-        TInstant StartMergePDisksTime;
-        TInstant StartMergeTabletsTime;
-        TInstant StartResponseBuildingTime;
-    };
-    TEventLog EventLog;
 public:
     TJsonCluster(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev)
-        : Viewer(viewer)
-        , Event(ev)
+        : TViewerPipeClient(viewer, ev)
     {
         const auto& params(Event->Get()->Request.GetParams());
         JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true);
@@ -53,416 +101,679 @@ public:
         InitConfig(params);
         Tablets = FromStringWithDefault<bool>(params.Get("tablets"), false);
         Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
+        OffloadMerge = FromStringWithDefault<bool>(params.Get("offload_merge"), OffloadMerge);
+        OffloadMergeAttempts = FromStringWithDefault<bool>(params.Get("offload_merge_attempts"), OffloadMergeAttempts);
     }
 
     void Bootstrap() override {
-        EventLog.StartTime = TActivationContext::Now();
-        SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
-        RequestConsoleListTenants();
-        Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
+        ClusterInfo.SetVersion(Viewer->GetCapabilityVersion("/viewer/cluster"));
+        NodesInfoResponse = MakeRequest<TEvInterconnect::TEvNodesInfo>(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
+        NodeStateResponse = MakeWhiteboardRequest(TActivationContext::ActorSystem()->NodeId, new TEvWhiteboard::TEvNodeStateRequest());
+        PDisksResponse = RequestBSControllerPDisks();
+        StorageStatsResponse = RequestBSControllerStorageStats();
+        ListTenantsResponse = MakeRequestConsoleListTenants();
+        if (AppData()->DomainsInfo && AppData()->DomainsInfo->Domain) {
+            TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
+            ClusterInfo.SetDomain(TStringBuilder() << "/" << AppData()->DomainsInfo->Domain->Name);
+            if (const auto& domain = domains->Domain) {
+                for (TTabletId id : domain->Coordinators) {
+                    FilterTablets.insert(id);
+                }
+                for (TTabletId id : domain->Mediators) {
+                    FilterTablets.insert(id);
+                }
+                for (TTabletId id : domain->TxAllocators) {
+                    FilterTablets.insert(id);
+                }
+                FilterTablets.insert(domain->SchemeRoot);
+                RootHiveId = domains->GetHive();
+                FilterTablets.insert(RootHiveId);
+                HiveNodeStatsResponse = MakeRequestHiveNodeStats(RootHiveId, new TEvHive::TEvRequestHiveNodeStats());
+            }
+            FilterTablets.insert(MakeBSControllerID());
+            FilterTablets.insert(MakeDefaultHiveID());
+            FilterTablets.insert(MakeCmsID());
+            FilterTablets.insert(MakeNodeBrokerID());
+            FilterTablets.insert(MakeTenantSlotBrokerID());
+            FilterTablets.insert(MakeConsoleID());
+        }
+        Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
     }
 
-    void PassAway() override {
-        if (NodesInfo != nullptr) {
-            TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
-            for (const auto& ni : NodesInfo->Nodes) {
-                if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
-                    Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe);
-                }
+private:
+    static constexpr size_t BATCH_SIZE = 200;
+
+    void BuildCandidates(TNodeBatch& batch, std::vector<TNode*>& candidates) {
+        auto itCandidate = candidates.begin();
+        for (; itCandidate != candidates.end() && batch.NodesToAskFor.size() < OffloadMergeAttempts; ++itCandidate) {
+            batch.NodesToAskFor.push_back(*itCandidate);
+        }
+        candidates.erase(candidates.begin(), itCandidate);
+        for (TNode* node : batch.NodesToAskAbout) {
+            if (node->Static) {
+                batch.HasStaticNodes = true;
             }
         }
-        TBase::PassAway();
     }
 
-    void SendWhiteboardTabletStateRequest() {
-        THashSet<TTabletId> filterTablets;
-        TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
-        if (const auto& domain = domains->Domain) {
-            for (TTabletId id : domain->Coordinators) {
-                filterTablets.emplace(id);
+    void SplitBatch(TNodeBatch& nodeBatch, std::vector<TNodeBatch>& batches) {
+        std::vector<TNode*> candidates = nodeBatch.NodesToAskAbout;
+        std::sort(candidates.begin(), candidates.end(), [](TNode* a, TNode* b) {
+            return a->GetCandidateScore() > b->GetCandidateScore();
+        });
+        while (nodeBatch.NodesToAskAbout.size() > BATCH_SIZE) {
+            TNodeBatch newBatch;
+            size_t splitSize = std::min(BATCH_SIZE, nodeBatch.NodesToAskAbout.size() / 2);
+            newBatch.NodesToAskAbout.reserve(splitSize);
+            for (size_t i = 0; i < splitSize; ++i) {
+                newBatch.NodesToAskAbout.push_back(nodeBatch.NodesToAskAbout.back());
+                nodeBatch.NodesToAskAbout.pop_back();
             }
-            for (TTabletId id : domain->Mediators) {
-                filterTablets.emplace(id);
+            BuildCandidates(newBatch, candidates);
+            batches.emplace_back(std::move(newBatch));
+        }
+        if (!nodeBatch.NodesToAskAbout.empty()) {
+            BuildCandidates(nodeBatch, candidates);
+            batches.emplace_back(std::move(nodeBatch));
+        }
+    }
+
+    std::vector<TNodeBatch> BatchNodes() {
+        std::vector<TNodeBatch> batches;
+        if (OffloadMerge) {
+            std::unordered_map<TSubDomainKey, TNodeBatch> batchSubDomain;
+            std::unordered_map<TString, TNodeBatch> batchDataCenters;
+            for (TNode& node : NodeData) {
+                if (node.Static) {
+                    batchDataCenters[node.DataCenter].NodesToAskAbout.push_back(&node);
+                } else {
+                    batchSubDomain[node.SubDomainKey].NodesToAskAbout.push_back(&node);
+                }
             }
-            for (TTabletId id : domain->TxAllocators) {
-                filterTablets.emplace(id);
+            for (auto& [subDomainKey, nodeBatch] : batchSubDomain) {
+                if (nodeBatch.NodesToAskAbout.size() == 1) {
+                    TNode* node = nodeBatch.NodesToAskAbout.front();
+                    batchDataCenters[node->DataCenter].NodesToAskAbout.push_back(node);
+                } else {
+                    SplitBatch(nodeBatch, batches);
+                }
             }
-            filterTablets.emplace(domain->SchemeRoot);
-            filterTablets.emplace(domains->GetHive());
-        }
-        filterTablets.emplace(MakeBSControllerID());
-        filterTablets.emplace(MakeDefaultHiveID());
-        filterTablets.emplace(MakeCmsID());
-        filterTablets.emplace(MakeNodeBrokerID());
-        filterTablets.emplace(MakeTenantSlotBrokerID());
-        filterTablets.emplace(MakeConsoleID());
-        const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription());
-        if (pathDescription.HasDomainDescription()) {
-            const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription());
-            for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) {
-                filterTablets.emplace(tabletId);
+            for (auto& [dataCenter, nodeBatch] : batchDataCenters) {
+                SplitBatch(nodeBatch, batches);
             }
-            for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) {
-                filterTablets.emplace(tabletId);
+        } else {
+            TNodeBatch nodeBatch;
+            for (TNode& node : NodeData) {
+                nodeBatch.NodesToAskAbout.push_back(&node);
             }
-            if (domainDescription.HasDomainKey()) {
-                if (domainDescription.GetDomainKey().HasSchemeShard()) {
-                    filterTablets.emplace(domainDescription.GetDomainKey().GetSchemeShard());
+            SplitBatch(nodeBatch, batches);
+        }
+        return batches;
+    }
+
+    bool TimeToAskWhiteboard() {
+        if (NodesInfoResponse) {
+            return false;
+        }
+
+        if (NodeStateResponse) {
+            return false;
+        }
+
+        if (ListTenantsResponse) {
+            return false;
+        }
+
+        if (PDisksResponse) {
+            return false;
+        }
+
+        if (StorageStatsResponse) {
+            return false;
+        }
+
+        if (HiveNodeStatsResponse) {
+            return false;
+        }
+
+        return true;
+    }
+
+    void ProcessResponses() {
+        if (NodesInfoResponse && NodesInfoResponse->IsDone()) {
+            if (NodesInfoResponse->IsOk()) {
+                std::unordered_set<TString> hosts;
+                for (const auto& ni : NodesInfoResponse->Get()->Nodes) {
+                    TNode& node = NodeData.emplace_back();
+                    node.NodeInfo = ni;
+                    node.NodeId = ni.NodeId;
+                    node.Static = ni.IsStatic;
+                    node.DataCenter = ni.Location.GetDataCenterId();
+                    hosts.insert(ni.Host);
+                }
+                for (TNode& node : NodeData) {
+                    NodeCache.emplace(node.NodeInfo.NodeId, &node);
                 }
+                ClusterInfo.SetNodesTotal(NodesInfoResponse->Get()->Nodes.size());
+                ClusterInfo.SetHosts(hosts.size());
+            } else {
+                AddProblem("no-nodes-info");
             }
+            NodesInfoResponse.reset();
+        }
+
+        if (NodeData.empty()) {
+            return;
         }
 
-        TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
-        for (const auto& ni : NodesInfo->Nodes) {
-            if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
-                TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId);
-                auto request = new TEvWhiteboard::TEvTabletStateRequest();
-                for (TTabletId id: filterTablets) {
-                    request->Record.AddFilterTabletId(id);
+        if (NodeStateResponse && NodeStateResponse->IsDone()) {
+            if (NodeStateResponse->IsOk()) {
+                for (const auto& nodeStateInfo : NodeStateResponse->Get()->Record.GetNodeStateInfo()) {
+                    if (nodeStateInfo.GetConnected()) {
+                        TNodeId nodeId = FromStringWithDefault(TStringBuf(nodeStateInfo.GetPeerName()).Before(':'), 0);
+                        if (nodeId) {
+                            TNode* node = NodeCache[nodeId];
+                            if (node) {
+                                node->Connected = true;
+                            }
+                        }
+                    }
                 }
-                SendRequest(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+            } else {
+                AddProblem("no-node-state-info");
             }
+            NodeStateResponse.reset();
         }
-    }
 
-    void SendWhiteboardRequests() {
-        TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
-        for (const auto& ni : NodesInfo->Nodes) {
-            TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId);
-            SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+        if (HiveNodeStatsResponse && HiveNodeStatsResponse->IsDone()) {
+            if (HiveNodeStatsResponse->IsOk()) {
+                for (const auto& nodeStats : HiveNodeStatsResponse->Get()->Record.GetNodeStats()) {
+                    TNodeId nodeId = nodeStats.GetNodeId();
+                    TNode* node = NodeCache[nodeId];
+                    if (node) {
+                        node->SubDomainKey = TSubDomainKey(nodeStats.GetNodeDomain());
+                    }
+                }
+            } else {
+                AddProblem("no-hive-node-stats");
+            }
+            HiveNodeStatsResponse.reset();
+        }
 
-            if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
-                SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
-                SendRequest(whiteboardServiceId,new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
-                SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId);
+        if (ListTenantsResponse && ListTenantsResponse->IsDone()) {
+            if (ListTenantsResponse->IsOk()) {
+                Ydb::Cms::ListDatabasesResult listTenantsResult;
+                ListTenantsResponse->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult);
+                ClusterInfo.SetTenants(listTenantsResult.paths().size());
+            } else {
+                AddProblem("no-tenants-info");
             }
+            ListTenantsResponse.reset();
         }
-        if (Tablets) {
-            SendWhiteboardTabletStateRequest();
+
+        if (PDisksResponse && PDisksResponse->IsDone()) {
+            if (PDisksResponse->IsOk()) {
+                for (const NKikimrSysView::TPDiskEntry& entry : PDisksResponse->Get()->Record.GetEntries()) {
+                    const NKikimrSysView::TPDiskInfo& info = entry.GetInfo();
+                    (*ClusterInfo.MutableMapStorageTotal())[info.GetType()] += info.GetTotalSize();
+                    (*ClusterInfo.MutableMapStorageUsed())[info.GetType()] += info.GetTotalSize() - info.GetAvailableSize();
+                }
+            } else {
+                AddProblem("no-pdisk-info");
+            }
+            PDisksResponse.reset();
         }
-    }
 
-    void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) {
-        EventLog.StartHandleNodesInfoTime = TActivationContext::Now();
-        NodesInfo = ev->Release();
-        // before making requests to Whiteboard with the Tablets parameter, we need to review the TEvDescribeSchemeResult information
-        if (Tablets) {
-            THolder<TEvTxUserProxy::TEvNavigate> request = MakeHolder<TEvTxUserProxy::TEvNavigate>();
-            if (!Event->Get()->UserToken.empty()) {
-                request->Record.SetUserToken(Event->Get()->UserToken);
-            }
-            NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath();
-            TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
-            if (const auto& domain = domains->Domain) {
-                TString domainPath = "/" + domain->Name;
-                record->SetPath(domainPath);
+        if (StorageStatsResponse && StorageStatsResponse->IsDone()) {
+            if (StorageStatsResponse->IsOk()) {
+                for (NKikimrSysView::TStorageStatsEntry& entry : *StorageStatsResponse->Get()->Record.MutableEntries()) {
+                    NKikimrSysView::TStorageStatsEntry& newEntry = (*ClusterInfo.AddStorageStats()) = std::move(entry);
+                    newEntry.ClearPDiskFilterData(); // remove binary data
+                }
+            } else {
+                AddProblem("no-storage-stats");
             }
-            record->MutableOptions()->SetReturnPartitioningInfo(false);
-            record->MutableOptions()->SetReturnPartitionConfig(false);
-            record->MutableOptions()->SetReturnChildren(false);
-            SendRequest(MakeTxProxyID(), request.Release());
-        } else {
-            SendWhiteboardRequests();
+            StorageStatsResponse.reset();
         }
 
-        RequestDone();
+        if (TimeToAskWhiteboard()) {
+            std::vector<TNodeBatch> batches = BatchNodes();
+            SendWhiteboardRequests(batches);
+        }
     }
 
-    void Undelivered(TEvents::TEvUndelivered::TPtr &ev) {
-        ui32 nodeId = ev.Get()->Cookie;
-        switch (ev->Get()->SourceType) {
-        case TEvWhiteboard::EvSystemStateRequest:
-            if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) {
-                RequestDone();
-            }
-            break;
-        case TEvWhiteboard::EvVDiskStateRequest:
-            if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) {
-                RequestDone();
-            }
-            break;
-        case TEvWhiteboard::EvPDiskStateRequest:
-            if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) {
-                RequestDone();
+    void InitSystemWhiteboardRequest(NKikimrWhiteboard::TEvSystemStateRequest* request) {
+        //request->AddFieldsRequired(-1);
+        Y_UNUSED(request);
+    }
+
+    void InitTabletWhiteboardRequest(NKikimrWhiteboard::TEvTabletStateRequest* request) {
+        //request->AddFieldsRequired(-1);
+        Y_UNUSED(request);
+    }
+
+    void SendWhiteboardRequest(TNodeBatch& batch) {
+        TNodeId nodeId = OffloadMerge ? batch.ChooseNodeId() : 0;
+        if (nodeId) {
+            if (SystemViewerResponse.count(nodeId) == 0) {
+                auto viewerRequest = std::make_unique<TEvViewer::TEvViewerRequest>();
+                InitSystemWhiteboardRequest(viewerRequest->Record.MutableSystemRequest());
+                viewerRequest->Record.SetTimeout(Timeout / 2);
+                for (const TNode* node : batch.NodesToAskAbout) {
+                    viewerRequest->Record.MutableLocation()->AddNodeId(node->NodeId);
+                }
+                SystemViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release()));
+                NodeBatches.emplace(nodeId, batch);
+                ++WhiteboardStateRequestsInFlight;
             }
-            break;
-        case TEvWhiteboard::EvBSGroupStateRequest:
-            if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) {
-                RequestDone();
+            if (batch.HasStaticNodes && TabletViewerResponse.count(nodeId) == 0) {
+                auto viewerRequest = std::make_unique<TEvViewer::TEvViewerRequest>();
+                InitTabletWhiteboardRequest(viewerRequest->Record.MutableTabletRequest());
+                viewerRequest->Record.SetTimeout(Timeout / 2);
+                for (const TNode* node : batch.NodesToAskAbout) {
+                    if (node->Static) {
+                        viewerRequest->Record.MutableLocation()->AddNodeId(node->NodeId);
+                    }
+                }
+                if (viewerRequest->Record.GetLocation().NodeIdSize() > 0) {
+                    TabletViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release()));
+                    NodeBatches.emplace(nodeId, batch);
+                    ++WhiteboardStateRequestsInFlight;
+                }
             }
-            break;
-        case TEvWhiteboard::EvTabletStateRequest:
-            if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) {
-                RequestDone();
+        } else {
+            for (const TNode* node : batch.NodesToAskAbout) {
+                if (node->Disconnected) {
+                    continue;
+                }
+                TNodeId nodeId = node->NodeId;
+                if (SystemStateResponse.count(nodeId) == 0) {
+                    auto request = new TEvWhiteboard::TEvSystemStateRequest();
+                    InitSystemWhiteboardRequest(&request->Record);
+                    SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request));
+                    ++WhiteboardStateRequestsInFlight;
+                }
+                if (node->Static) {
+                    if (TabletStateResponse.count(nodeId) == 0) {
+                        auto request = std::make_unique<TEvWhiteboard::TEvTabletStateRequest>();
+                        request->Record.SetGroupBy("Type,State");
+                        TabletStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release()));
+                        ++WhiteboardStateRequestsInFlight;
+                    }
+                }
             }
-            break;
         }
     }
 
-    void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) {
-        ui32 nodeId = ev->Get()->NodeId;
-        if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) {
-            RequestDone();
+    void SendWhiteboardRequests(std::vector<TNodeBatch>& batches) {
+        for (TNodeBatch& batch : batches) {
+            SendWhiteboardRequest(batch);
         }
-        TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
-        if (nodeId <= dynamicNameserviceConfig->MaxStaticNodeId) {
-            if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) {
-                RequestDone();
+    }
+
+    void ProcessWhiteboard() {
+        for (const auto& [responseNodeId, response] : SystemViewerResponse) {
+            if (response.IsOk()) {
+                const auto& systemResponse(response.Get()->Record.GetSystemResponse());
+                for (auto& systemInfo : systemResponse.GetSystemStateInfo()) {
+                    TNodeId nodeId = systemInfo.GetNodeId();
+                    TNode* node = NodeCache[nodeId];
+                    if (node) {
+                        node->SystemState = std::move(systemInfo);
+                        if (!node->DataCenter) {
+                            node->DataCenter = node->SystemState.GetLocation().GetDataCenter();
+                        }
+                    }
+                }
             }
-            if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) {
-                RequestDone();
+        }
+        for (auto& [nodeId, response] : SystemStateResponse) {
+            if (response.IsOk()) {
+                auto& systemState(response.Get()->Record);
+                if (systemState.SystemStateInfoSize() > 0) {
+                    TNode* node = NodeCache[nodeId];
+                    if (node) {
+                        node->SystemState = std::move(*systemState.MutableSystemStateInfo(0));
+                        if (!node->DataCenter) {
+                            node->DataCenter = node->SystemState.GetLocation().GetDataCenter();
+                        }
+                    }
+                }
             }
-            if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) {
-                RequestDone();
+        }
+        std::unordered_map<TTabletId, NKikimrWhiteboard::TTabletStateInfo> mergedTabletState;
+        for (auto& [nodeId, response] : TabletViewerResponse) {
+            if (response.IsOk()) {
+                auto& tabletResponse(*(response.Get()->Record.MutableTabletResponse()));
+                for (auto& tabletState : *tabletResponse.MutableTabletStateInfo()) {
+                    NKikimrWhiteboard::TTabletStateInfo& mergedState(mergedTabletState[tabletState.GetTabletId()]);
+                    if (tabletState.GetGeneration() > mergedState.GetGeneration()) {
+                        mergedState = std::move(tabletState);
+                    }
+                }
             }
-            if (Tablets) {
-                if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) {
-                    RequestDone();
+        }
+        for (auto& [nodeId, response] : TabletStateResponse) {
+            if (response.IsOk()) {
+                for (auto& tabletState : *response.Get()->Record.MutableTabletStateInfo()) {
+                    NKikimrWhiteboard::TTabletStateInfo& mergedState(mergedTabletState[tabletState.GetTabletId()]);
+                    if (tabletState.GetGeneration() > mergedState.GetGeneration()) {
+                        mergedState = std::move(tabletState);
+                    }
                 }
             }
         }
+
+        for (TNode& node : NodeData) {
+            const NKikimrWhiteboard::TSystemStateInfo& systemState = node.SystemState;
+            (*ClusterInfo.MutableMapDataCenters())[node.DataCenter]++;
+            if (systemState.HasNumberOfCpus()) {
+                ClusterInfo.SetNumberOfCpus(ClusterInfo.GetNumberOfCpus() + systemState.GetNumberOfCpus());
+            }
+            if (systemState.LoadAverageSize() > 0) {
+                ClusterInfo.SetLoadAverage(ClusterInfo.GetLoadAverage() + systemState.GetLoadAverage(0));
+            }
+            if (systemState.HasVersion()) {
+                (*ClusterInfo.MutableMapVersions())[systemState.GetVersion()]++;
+            }
+            if (systemState.HasClusterName() && !ClusterInfo.GetName()) {
+                ClusterInfo.SetName(systemState.GetClusterName());
+            }
+            ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + systemState.GetMemoryLimit());
+            ClusterInfo.SetMemoryUsed(ClusterInfo.GetMemoryUsed() + systemState.GetMemoryUsed());
+            if (!node.Disconnected && node.SystemState.HasSystemState()) {
+                ClusterInfo.SetNodesAlive(ClusterInfo.GetNodesAlive() + 1);
+            }
+            (*ClusterInfo.MutableMapNodeStates())[NKikimrWhiteboard::EFlag_Name(node.SystemState.GetSystemState())]++;
+        }
+
+        for (auto& [tabletId, tabletState] : mergedTabletState) {
+            if (FilterTablets.empty() || FilterTablets.count(tabletId)) {
+                auto tabletFlag = GetWhiteboardFlag(GetFlagFromTabletState(tabletState.GetState()));
+                tabletState.SetOverall(tabletFlag);
+                (*ClusterInfo.AddSystemTablets()) = std::move(tabletState);
+            }
+        }
     }
 
-    void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
-        ui64 nodeId = ev.Get()->Cookie;
-        SystemInfo[nodeId] = std::move(ev->Get()->Record);
-        NodesAlive.insert(nodeId);
+    void WhiteboardRequestDone() {
+        --WhiteboardStateRequestsInFlight;
+        if (WhiteboardStateRequestsInFlight == 0) {
+            ProcessWhiteboard();
+        }
         RequestDone();
     }
 
-    void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) {
-        ui64 nodeId = ev.Get()->Cookie;
-        VDiskInfo[nodeId] = std::move(ev->Get()->Record);
-        NodesAlive.insert(nodeId);
+    void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) {
+        NodesInfoResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) {
-        ui64 nodeId = ev.Get()->Cookie;
-        PDiskInfo[nodeId] = std::move(ev->Get()->Record);
-        NodesAlive.insert(nodeId);
+    void Handle(TEvWhiteboard::TEvNodeStateResponse::TPtr& ev) {
+        NodeStateResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) {
-        ui64 nodeId = ev.Get()->Cookie;
-        BSGroupInfo[nodeId] = std::move(ev->Get()->Record);
-        NodesAlive.insert(nodeId);
+    void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) {
+        ListTenantsResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) {
-        ui64 nodeId = ev.Get()->Cookie;
-        TabletInfo[nodeId] = std::move(ev->Get()->Record);
-        NodesAlive.insert(nodeId);
+    void Handle(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr& ev) {
+        PDisksResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) {
-        EventLog.StartHandleListTenantsResponseTime = TActivationContext::Now();
-        Ydb::Cms::ListDatabasesResult listTenantsResult;
-        ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult);
-        TenantsNumber = listTenantsResult.paths().size();
+    void Handle(NSysView::TEvSysView::TEvGetStorageStatsResponse::TPtr& ev) {
+        StorageStatsResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) {
-        if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) {
-            DescribeResult = ev->Release();
-            SendWhiteboardRequests();
-        }
+    void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) {
+        HiveNodeStatsResponse->Set(std::move(ev));
+        ProcessResponses();
         RequestDone();
     }
 
-    void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
-        if (ev->Get()->Status != NKikimrProto::OK) {
-            RequestDone();
-        }
+    void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
+        ui64 nodeId = ev.Get()->Cookie;
+        SystemStateResponse[nodeId].Set(std::move(ev));
+        WhiteboardRequestDone();
     }
 
-    STATEFN(StateRequested) {
-        switch (ev->GetTypeRewrite()) {
-            hFunc(TEvInterconnect::TEvNodesInfo, Handle);
-            hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle);
-            hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle);
-            hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle);
-            hFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle);
-            hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle);
-            hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle);
-            hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle);
-            hFunc(TEvents::TEvUndelivered, Undelivered);
-            hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
-            hFunc(TEvTabletPipe::TEvClientConnected, Handle);
-            cFunc(TEvents::TSystem::Wakeup, HandleTimeout);
-        }
+    void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) {
+        ui64 nodeId = ev.Get()->Cookie;
+        TabletStateResponse[nodeId].Set(std::move(ev));
+        WhiteboardRequestDone();
     }
 
-    NKikimrWhiteboard::TEvBSGroupStateResponse MergedBSGroupInfo;
-    NKikimrWhiteboard::TEvVDiskStateResponse MergedVDiskInfo;
-    NKikimrWhiteboard::TEvPDiskStateResponse MergedPDiskInfo;
-    NKikimrWhiteboard::TEvTabletStateResponse MergedTabletInfo;
-    TMap<NKikimrBlobStorage::TVDiskID, const NKikimrWhiteboard::TVDiskStateInfo&> VDisksIndex;
-    TMap<std::pair<ui32, ui32>, const NKikimrWhiteboard::TPDiskStateInfo&> PDisksIndex;
-
-    void ReplyAndPassAway() override {
-        EventLog.StartMergeBSGroupsTime = TActivationContext::Now();
-        MergeWhiteboardResponses(MergedBSGroupInfo, BSGroupInfo);
-        EventLog.StartMergeVDisksTime = TActivationContext::Now();
-        MergeWhiteboardResponses(MergedVDiskInfo, VDiskInfo);
-        EventLog.StartMergePDisksTime = TActivationContext::Now();
-        MergeWhiteboardResponses(MergedPDiskInfo, PDiskInfo);
-
-        EventLog.StartMergeTabletsTime = TActivationContext::Now();
-        THashSet<TTabletId> tablets;
-        if (Tablets) {
-            MergeWhiteboardResponses(MergedTabletInfo, TabletInfo);
+    void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) {
+        ui64 nodeId = ev.Get()->Cookie;
+        switch (ev->Get()->Record.Response_case()) {
+            case NKikimrViewer::TEvViewerResponse::ResponseCase::kSystemResponse:
+                SystemViewerResponse[nodeId].Set(std::move(ev));
+                NodeBatches.erase(nodeId);
+                WhiteboardRequestDone();
+                return;
+            case NKikimrViewer::TEvViewerResponse::ResponseCase::kTabletResponse:
+                TabletViewerResponse[nodeId].Set(std::move(ev));
+                NodeBatches.erase(nodeId);
+                WhiteboardRequestDone();
+                return;
+            default:
+                break;
         }
-
-        EventLog.StartResponseBuildingTime = TActivationContext::Now();
-        if (Tablets) {
-            TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
-            if (const auto& domain = domains->Domain) {
-                tablets.emplace(MakeBSControllerID());
-                tablets.emplace(MakeDefaultHiveID());
-                tablets.emplace(MakeCmsID());
-                tablets.emplace(MakeNodeBrokerID());
-                tablets.emplace(MakeTenantSlotBrokerID());
-                tablets.emplace(MakeConsoleID());
-                tablets.emplace(domain->SchemeRoot);
-                tablets.emplace(domains->GetHive());
-                for (TTabletId id : domain->Coordinators) {
-                    tablets.emplace(id);
-                }
-                for (TTabletId id : domain->Mediators) {
-                    tablets.emplace(id);
-                }
-                for (TTabletId id : domain->TxAllocators) {
-                    tablets.emplace(id);
+        TString error("WrongResponse");
+        {
+            auto itSystemViewerResponse = SystemViewerResponse.find(nodeId);
+            if (itSystemViewerResponse != SystemViewerResponse.end()) {
+                if (itSystemViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
+                    }
+                    WhiteboardRequestDone();
                 }
             }
-
-            if (DescribeResult) {
-                const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription());
-                if (pathDescription.HasDomainDescription()) {
-                    const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription());
-                    for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) {
-                        tablets.emplace(tabletId);
-                    }
-                    for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) {
-                        tablets.emplace(tabletId);
-                    }
-                    if (domainDescription.HasDomainKey()) {
-                        if (domainDescription.GetDomainKey().HasSchemeShard()) {
-                            tablets.emplace(domainDescription.GetDomainKey().GetSchemeShard());
-                        }
+        }
+        {
+            auto itTabletViewerResponse = TabletViewerResponse.find(nodeId);
+            if (itTabletViewerResponse != TabletViewerResponse.end()) {
+                if (itTabletViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
                     }
+                    WhiteboardRequestDone();
                 }
             }
         }
+    }
 
-        ui64 totalStorageSize = 0;
-        ui64 availableStorageSize = 0;
-
-        for (auto& element : TWhiteboardInfo<NKikimrWhiteboard::TEvPDiskStateResponse>::GetElementsField(MergedPDiskInfo)) {
-            if (element.HasTotalSize() && element.HasAvailableSize()) {
-                totalStorageSize += element.GetTotalSize();
-                availableStorageSize += element.GetAvailableSize();
-            }
-            element.SetStateFlag(GetWhiteboardFlag(GetPDiskStateFlag(element)));
-            element.SetOverall(GetWhiteboardFlag(GetPDiskOverallFlag(element)));
-            PDisksIndex.emplace(TWhiteboardInfo<NKikimrWhiteboard::TEvPDiskStateResponse>::GetElementKey(element), element);
+    void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
+        TNodeId nodeId = ev->Get()->NodeId;
+        TNode* node = NodeCache[nodeId];
+        if (node) {
+            node->Disconnected = true;
         }
-        for (auto& element : TWhiteboardInfo<NKikimrWhiteboard::TEvVDiskStateResponse>::GetElementsField(MergedVDiskInfo)) {
-            element.SetOverall(GetWhiteboardFlag(GetVDiskOverallFlag(element)));
-            VDisksIndex.emplace(TWhiteboardInfo<NKikimrWhiteboard::TEvVDiskStateResponse>::GetElementKey(element), element);
+        TString error("NodeDisconnected");
+        {
+            auto itSystemStateResponse = SystemStateResponse.find(nodeId);
+            if (itSystemStateResponse != SystemStateResponse.end()) {
+                if (itSystemStateResponse->second.Error(error)) {
+                    WhiteboardRequestDone();
+                }
+            }
         }
-        NKikimrViewer::EFlag flag = NKikimrViewer::Grey;
-        for (const auto& element : TWhiteboardInfo<NKikimrWhiteboard::TEvBSGroupStateResponse>::GetElementsField(MergedBSGroupInfo)) {
-            flag = Max(flag, GetBSGroupOverallFlag(element, VDisksIndex, PDisksIndex));
+        {
+            auto itTabletStateResponse = TabletStateResponse.find(nodeId);
+            if (itTabletStateResponse != TabletStateResponse.end()) {
+                if (itTabletStateResponse->second.Error(error)) {
+                    WhiteboardRequestDone();
+                }
+            }
         }
-        ui32 numberOfCpus = 0;
-        double loadAverage = 0;
-        THashSet<TString> dataCenters;
-        THashSet<TString> versions;
-        THashSet<TString> hosts;
-        THashMap<TString, int> names;
-        for (const auto& [nodeId, sysInfo] : SystemInfo) {
-            if (sysInfo.SystemStateInfoSize() > 0) {
-                const NKikimrWhiteboard::TSystemStateInfo& systemState = sysInfo.GetSystemStateInfo(0);
-                if (systemState.HasNumberOfCpus() && (!systemState.HasHost() || hosts.emplace(systemState.GetHost()).second)) {
-                    numberOfCpus += systemState.GetNumberOfCpus();
-                    if (systemState.LoadAverageSize() > 0) {
-                        loadAverage += systemState.GetLoadAverage(0);
+        {
+            auto itSystemViewerResponse = SystemViewerResponse.find(nodeId);
+            if (itSystemViewerResponse != SystemViewerResponse.end()) {
+                if (itSystemViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
                     }
+                    WhiteboardRequestDone();
                 }
-                if (systemState.HasDataCenter()) {
-                    dataCenters.insert(systemState.GetDataCenter());
+            }
+        }
+        {
+            auto itTabletViewerResponse = TabletViewerResponse.find(nodeId);
+            if (itTabletViewerResponse != TabletViewerResponse.end()) {
+                if (itTabletViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
+                    }
+                    WhiteboardRequestDone();
                 }
-                if (systemState.HasVersion()) {
-                    versions.insert(systemState.GetVersion());
+            }
+        }
+    }
+
+    void Undelivered(TEvents::TEvUndelivered::TPtr& ev) {
+        TNodeId nodeId = ev->Sender.NodeId();
+        TString error("Undelivered");
+        {
+            auto itSystemViewerResponse = SystemViewerResponse.find(nodeId);
+            if (itSystemViewerResponse != SystemViewerResponse.end()) {
+                if (itSystemViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
+                    }
+                    WhiteboardRequestDone();
                 }
-                if (systemState.HasClusterName()) {
-                    names[systemState.GetClusterName()]++;
+            }
+        }
+        {
+            auto itTabletViewerResponse = TabletViewerResponse.find(nodeId);
+            if (itTabletViewerResponse != TabletViewerResponse.end()) {
+                if (itTabletViewerResponse->second.Error(error)) {
+                    if (NodeBatches.count(nodeId)) {
+                        SendWhiteboardRequest(NodeBatches[nodeId]);
+                        NodeBatches.erase(nodeId);
+                    }
+                    WhiteboardRequestDone();
                 }
             }
         }
+    }
 
-        NKikimrViewer::TClusterInfo pbCluster;
+    bool OnBscError(const TString& error) {
+        bool result = false;
+        if (StorageStatsResponse && StorageStatsResponse->Error(error)) {
+            ProcessResponses();
+            result = true;
+        }
+        if (PDisksResponse && PDisksResponse->Error(error)) {
+            ProcessResponses();
+            result = true;
+        }
+        return result;
+    }
 
-        if (Tablets) {
-            for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : MergedTabletInfo.GetTabletStateInfo()) {
-                if (tablets.contains(tabletInfo.GetTabletId())) {
-                    NKikimrWhiteboard::TTabletStateInfo* tablet = pbCluster.AddSystemTablets();
-                    tablet->CopyFrom(tabletInfo);
-                    auto tabletFlag = GetWhiteboardFlag(GetFlagFromTabletState(tablet->GetState()));
-                    tablet->SetOverall(tabletFlag);
-                    flag = Max(flag, GetViewerFlag(tabletFlag));
+    void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
+        if (ev->Get()->Status != NKikimrProto::OK) {
+            TString error = TStringBuilder() << "Failed to establish pipe to " << ev->Get()->TabletId << ": "
+                << NKikimrProto::EReplyStatus_Name(ev->Get()->Status);
+            if (ev->Get()->TabletId == GetBSControllerId()) {
+                if (OnBscError(error)) {
+                    AddProblem("bsc-error");
+                }
+            }
+            if (ev->Get()->TabletId == RootHiveId) {
+                if (HiveNodeStatsResponse && HiveNodeStatsResponse->Error(error)) {
+                    AddProblem("hive-error");
+                    ProcessResponses();
+                }
+            }
+            if (ev->Get()->TabletId == MakeConsoleID()) {
+                if (ListTenantsResponse && ListTenantsResponse->Error(error)) {
+                    AddProblem("console-error");
+                    ProcessResponses();
                 }
             }
-            pbCluster.SetTablets(MergedTabletInfo.TabletStateInfoSize());
         }
-        pbCluster.SetTenants(TenantsNumber);
+        TBase::Handle(ev); // all RequestDone() are handled by base handler
+    }
 
-        pbCluster.SetOverall(flag);
-        if (NodesInfo != nullptr) {
-            pbCluster.SetNodesTotal(NodesInfo->Nodes.size());
-            pbCluster.SetNodesAlive(NodesAlive.size());
+    void HandleTimeout() {
+        ReplyAndPassAway();
+    }
+
+    STATEFN(StateWork) {
+        switch (ev->GetTypeRewrite()) {
+            hFunc(TEvInterconnect::TEvNodesInfo, Handle);
+            hFunc(TEvWhiteboard::TEvNodeStateResponse, Handle);
+            hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle);
+            hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle);
+            hFunc(TEvViewer::TEvViewerResponse, Handle);
+            hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle);
+            hFunc(NSysView::TEvSysView::TEvGetPDisksResponse, Handle);
+            hFunc(NSysView::TEvSysView::TEvGetStorageStatsResponse, Handle);
+            hFunc(TEvHive::TEvResponseHiveNodeStats, Handle);
+            hFunc(TEvents::TEvUndelivered, Undelivered);
+            hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
+            hFunc(TEvTabletPipe::TEvClientConnected, Handle);
+            cFunc(TEvents::TSystem::Wakeup, HandleTimeout);
         }
-        pbCluster.SetNumberOfCpus(numberOfCpus);
-        pbCluster.SetLoadAverage(loadAverage);
-        pbCluster.SetStorageTotal(totalStorageSize);
-        pbCluster.SetStorageUsed(totalStorageSize - availableStorageSize);
-        pbCluster.SetHosts(hosts.size());
-        TIntrusivePtr<TDomainsInfo> domains = AppData()->DomainsInfo;
-        if (const auto& domain = domains->Domain) {
-            TString domainName = "/" + domain->Name;
-            pbCluster.SetDomain(domainName);
+    }
+
+    void ReplyAndPassAway() override {
+        for (const auto& problem : Problems) {
+            ClusterInfo.AddProblems(problem);
         }
-        for (const TString& dc : dataCenters) {
-            pbCluster.AddDataCenters(dc);
+        for (const auto& [dataCenter, nodes] : ClusterInfo.GetMapDataCenters()) {
+            ClusterInfo.AddDataCenters(dataCenter);
         }
-        for (const TString& version : versions) {
-            pbCluster.AddVersions(version);
+        for (const auto& [version, count] : ClusterInfo.GetMapVersions()) {
+            ClusterInfo.AddVersions(version);
         }
-        auto itMax = std::max_element(names.begin(), names.end(), [](const auto& a, const auto& b) {
-            return a.second < b.second;
-        });
-        if (itMax != names.end()) {
-            pbCluster.SetName(itMax->first);
+        for (const auto& [type, size] : ClusterInfo.GetMapStorageTotal()) {
+            ClusterInfo.SetStorageTotal(ClusterInfo.GetStorageTotal() + size);
         }
-
-        TStringStream json;
-        TProtoToJson::ProtoToJson(json, pbCluster, JsonSettings);
-        Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom));
-
-        PassAway();
-    }
-
-    void HandleTimeout() {
-        EventLog.IsTimeout = true;
-        ReplyAndPassAway();
+        for (const auto& [type, size] : ClusterInfo.GetMapStorageUsed()) {
+            ClusterInfo.SetStorageUsed(ClusterInfo.GetStorageUsed() + size);
+        }
+        NKikimrWhiteboard::EFlag worstState = NKikimrWhiteboard::EFlag::Grey;
+        ui64 worstNodes = 0;
+        for (NKikimrWhiteboard::EFlag flag = NKikimrWhiteboard::EFlag::Grey; flag <= NKikimrWhiteboard::EFlag::Red; flag = NKikimrWhiteboard::EFlag(flag + 1)) {
+            auto itNodes = ClusterInfo.GetMapNodeStates().find(NKikimrWhiteboard::EFlag_Name(flag));
+            if (itNodes == ClusterInfo.GetMapNodeStates().end()) {
+                continue;
+            }
+            auto& nodes = itNodes->second;
+            if (nodes > worstNodes / 100) { // only if it's more than 1% of all nodes
+                worstState = flag;
+            }
+            worstNodes += nodes;
+        }
+        ClusterInfo.SetOverall(GetViewerFlag(worstState));
+        TStringStream out;
+        Proto2Json(ClusterInfo, out, {
+            .EnumMode = TProto2JsonConfig::EnumValueMode::EnumName,
+            .MapAsObject = true,
+            .StringifyNumbers = TProto2JsonConfig::EStringifyNumbersMode::StringifyInt64Always,
+            .WriteNanAsString = true,
+        });
+        TBase::ReplyAndPassAway(GetHTTPOKJSON(out.Str()));
     }
 
+public:
     static YAML::Node GetSwagger() {
         TSimpleYamlBuilder yaml({
             .Method = "get",

+ 1 - 6
ydb/core/viewer/viewer_feature_flags.h

@@ -49,7 +49,7 @@ public:
         Direct |= !TBase::Event->Get()->Request.GetHeader("X-Forwarded-From-Node").empty(); // we're already forwarding
         Direct |= (FilterDatabase == AppData()->TenantName); // we're already on the right node
         if (FilterDatabase && !Direct) {
-            RequestStateStorageEndpointsLookup(FilterDatabase); // to find some dynamic node and redirect there
+            return RedirectToDatabase(FilterDatabase); // to find some dynamic node and redirect query there
         } else if (!FilterDatabase) {
             MakeNodeConfigRequest(DomainPath);
             TenantsResponse = MakeRequestConsoleListTenants();
@@ -60,13 +60,8 @@ public:
         Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup());
     }
 
-    void HandleReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) {
-        TBase::ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(ev)));
-    }
-
     STATEFN(StateWork) {
         switch (ev->GetTypeRewrite()) {
-            hFunc(TEvStateStorage::TEvBoardInfo, HandleReply);
             hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle);
             hFunc(NConsole::TEvConsole::TEvGetNodeConfigResponse, Handle);
             hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle);

+ 39 - 0
ydb/core/viewer/viewer_helper.h

@@ -1,6 +1,45 @@
 #pragma once
 #include <util/generic/algorithm.h>
 
+template<>
+struct std::hash<NKikimr::TSubDomainKey> {
+    std::size_t operator ()(const NKikimr::TSubDomainKey& s) const {
+        return s.Hash();
+    }
+};
+
+template <>
+struct std::equal_to<NKikimrBlobStorage::TVDiskID> {
+    static decltype(auto) make_tuple(const NKikimrBlobStorage::TVDiskID& id) {
+        return std::make_tuple(
+                    id.GetGroupID(),
+                    id.GetGroupGeneration(),
+                    id.GetRing(),
+                    id.GetDomain(),
+                    id.GetVDisk()
+                    );
+    }
+
+    bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const {
+        return make_tuple(a) == make_tuple(b);
+    }
+};
+
+template <>
+struct std::less<NKikimrBlobStorage::TVDiskID> {
+    bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const {
+        return std::equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(a) < std::equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(b);
+    }
+};
+
+template <>
+struct std::hash<NKikimrBlobStorage::TVDiskID> {
+    size_t operator ()(const NKikimrBlobStorage::TVDiskID& a) const {
+        auto tp = std::equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(a);
+        return hash<decltype(tp)>()(tp);
+    }
+};
+
 namespace NKikimr::NViewer {
     template<typename TCollection, typename TFunc>
     void SortCollection(TCollection& collection, TFunc&& func, bool ReverseSort = false) {

+ 0 - 7
ydb/core/viewer/viewer_nodes.h

@@ -8,13 +8,6 @@
 #include "wb_group.h"
 #include <library/cpp/protobuf/json/proto2json.h>
 
-template<>
-struct std::hash<NKikimr::TSubDomainKey> {
-    std::size_t operator ()(const NKikimr::TSubDomainKey& s) const {
-        return s.Hash();
-    }
-};
-
 namespace NKikimr::NViewer {
 
 using namespace NProtobufJson;

+ 1 - 36
ydb/core/viewer/viewer_vdiskinfo.h

@@ -1,41 +1,6 @@
 #pragma once
 #include "json_wb_req.h"
-
-namespace std {
-
-template <>
-struct equal_to<NKikimrBlobStorage::TVDiskID> {
-    static decltype(auto) make_tuple(const NKikimrBlobStorage::TVDiskID& id) {
-        return std::make_tuple(
-                    id.GetGroupID(),
-                    id.GetGroupGeneration(),
-                    id.GetRing(),
-                    id.GetDomain(),
-                    id.GetVDisk()
-                    );
-    }
-
-    bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const {
-        return make_tuple(a) == make_tuple(b);
-    }
-};
-
-template <>
-struct less<NKikimrBlobStorage::TVDiskID> {
-    bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const {
-        return equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(a) < equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(b);
-    }
-};
-
-template <>
-struct hash<NKikimrBlobStorage::TVDiskID> {
-    size_t operator ()(const NKikimrBlobStorage::TVDiskID& a) const {
-        auto tp = equal_to<NKikimrBlobStorage::TVDiskID>::make_tuple(a);
-        return hash<decltype(tp)>()(tp);
-    }
-};
-
-}
+#include "viewer_helper.h"
 
 namespace NKikimr::NViewer {