Browse Source

Fix tablets on nodes (#8790)

Alexey Efimov 6 months ago
parent
commit
f2c3c58a54

+ 1 - 0
ydb/core/viewer/protos/viewer.proto

@@ -641,6 +641,7 @@ message TSchemeCacheRequest {
 message TEvViewerRequest {
     TNodeLocation Location = 1;
     uint32 Timeout = 2; // ms
+    string MergeFields = 3;
     oneof Request {
         NKikimrWhiteboard.TEvTabletStateRequest TabletRequest = 11;
         NKikimrWhiteboard.TEvSystemStateRequest SystemRequest = 12;

+ 17 - 7
ydb/core/viewer/viewer_nodes.h

@@ -1367,7 +1367,9 @@ public:
             for (TTabletId hiveId : HivesToAsk) {
                 auto request = std::make_unique<TEvHive::TEvRequestHiveNodeStats>();
                 request->Record.SetReturnMetrics(true);
-                request->Record.SetReturnExtendedTabletInfo(true);
+                if (Database) { // it's better to ask hive about tablets only if we're filtering by database
+                    request->Record.SetReturnExtendedTabletInfo(true);
+                }
                 if (AskHiveAboutPaths) {
                     request->Record.SetFilterTabletsBySchemeShardId(FilterPathId.OwnerId);
                     request->Record.SetFilterTabletsByPathId(FilterPathId.LocalPathId);
@@ -1386,12 +1388,14 @@ public:
                             ui32 nodeId = nodeStats.GetNodeId();
                             TNode* node = FindNode(nodeId);
                             if (node) {
-                                for (const NKikimrHive::THiveDomainStatsStateCount& stateStats : nodeStats.GetStateStats()) {
-                                    NKikimrViewer::TTabletStateInfo& viewerTablet(node->Tablets.emplace_back());
-                                    viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(stateStats.GetTabletType()));
-                                    viewerTablet.SetCount(stateStats.GetCount());
-                                    viewerTablet.SetState(GetFlagFromTabletState(stateStats.GetVolatileState()));
-                                    FieldsAvailable.set(+ENodeFields::Tablets);
+                                if (Database) { // it's better to ask hive about tablets only if we're filtering by database
+                                    for (const NKikimrHive::THiveDomainStatsStateCount& stateStats : nodeStats.GetStateStats()) {
+                                        NKikimrViewer::TTabletStateInfo& viewerTablet(node->Tablets.emplace_back());
+                                        viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(stateStats.GetTabletType()));
+                                        viewerTablet.SetCount(stateStats.GetCount());
+                                        viewerTablet.SetState(GetFlagFromTabletState(stateStats.GetVolatileState()));
+                                        FieldsAvailable.set(+ENodeFields::Tablets);
+                                    }
                                 }
                                 if (nodeStats.HasLastAliveTimestamp()) {
                                     node->SystemState.SetDisconnectTime(std::max(node->SystemState.GetDisconnectTime(), nodeStats.GetLastAliveTimestamp() / 1000)); // seconds
@@ -1648,10 +1652,14 @@ public:
         if (FieldsNeeded(FieldsTablets)) {
             for (auto& [nodeId, response] : TabletViewerResponse) {
                 if (response.IsOk()) {
+                    Cerr << "Good tablet response for node " << nodeId << Endl;
+                    Cerr << "LocationResponded: " << response.Get()->Record.GetLocationResponded().ShortDebugString() << Endl;
                     auto& tabletResponse(*(response.Get()->Record.MutableTabletResponse()));
                     if (tabletResponse.TabletStateInfoSize() > 0 && !tabletResponse.GetTabletStateInfo(0).HasCount()) {
+                        Cerr << "TabletResponse before merge: " << tabletResponse.ShortDebugString() << Endl;
                         GroupWhiteboardResponses(tabletResponse, "NodeId,Type,State");
                     }
+                    Cerr << "TabletResponse: " << tabletResponse.ShortDebugString() << Endl;
                     for (const auto& tabletState : tabletResponse.GetTabletStateInfo()) {
                         TNode* node = FindNode(tabletState.GetNodeId());
                         if (node) {
@@ -1663,6 +1671,8 @@ public:
                             }
                         }
                     }
+                } else {
+                    Cerr << "Bad tablet response for node " << nodeId << Endl;
                 }
             }
             for (auto& [nodeId, response] : TabletStateResponse) {

+ 72 - 6
ydb/core/viewer/viewer_request.cpp

@@ -36,7 +36,7 @@ public:
     }
 
     void Bootstrap() override {
-        TBase::RequestSettings.MergeFields = TWhiteboardInfo<TResponseType>::GetDefaultMergeField();
+        TBase::RequestSettings.MergeFields = Event->Get()->Record.GetMergeFields();
         TBase::RequestSettings.Timeout = Event->Get()->Record.GetTimeout();
         for (TNodeId nodeId : Event->Get()->Record.GetLocation().GetNodeId()) {
             TBase::RequestSettings.FilterNodeIds.push_back(nodeId);
@@ -74,15 +74,26 @@ public:
         NKikimr::NViewer::MergeWhiteboardResponses(*(response->Record.MutableBSGroupResponse()), perNodeStateInfo, fields);
     }
 
+    static void Merge(NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, TResponseType& nodeResponse);
+
     void ReplyAndPassAway() override {
         auto response = MakeHolder<TEvViewer::TEvViewerResponse>();
         auto& locationResponded = (*response->Record.MutableLocationResponded());
-        auto perNodeStateInfo = TBase::GetPerNodeStateInfo();
-        for (const auto& [nodeId, nodeResponse] : perNodeStateInfo) {
-            locationResponded.AddNodeId(nodeId);
-        }
 
-        MergeWhiteboardResponses(response.Get(), perNodeStateInfo, TBase::RequestSettings.MergeFields);
+        if (TBase::RequestSettings.MergeFields) {
+            auto perNodeStateInfo = TBase::GetPerNodeStateInfo();
+            for (const auto& [nodeId, nodeResponse] : perNodeStateInfo) {
+                locationResponded.AddNodeId(nodeId);
+            }
+            MergeWhiteboardResponses(response.Get(), perNodeStateInfo, TBase::RequestSettings.MergeFields);
+        } else {
+            for (auto& [nodeId, nodeResponse] : TBase::NodeResponses) {
+                if (nodeResponse.IsOk()) {
+                    locationResponded.AddNodeId(nodeId);
+                    Merge(response->Record, nodeId, nodeResponse.Get()->Record);
+                }
+            }
+        }
 
         TBase::Send(Event->Sender, response.Release(), 0, Event->Cookie);
         TBase::PassAway();
@@ -120,6 +131,17 @@ THolder<TEvWhiteboard::TEvTabletStateRequest> TViewerWhiteboardRequest<TEvWhiteb
     return request;
 }
 
+template<>
+void TViewerWhiteboardRequest<TEvWhiteboard::TEvTabletStateRequest, TEvWhiteboard::TEvTabletStateResponse>::Merge(
+        NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvTabletStateResponse& nodeResponse) {
+    auto& target = *viewerResponse.MutableTabletResponse();
+    for (auto& info : *nodeResponse.MutableTabletStateInfo()) {
+        auto& i = *target.AddTabletStateInfo();
+        i.MergeFrom(info);
+        i.SetNodeId(nodeId);
+    }
+}
+
 template<>
 THolder<TEvWhiteboard::TEvSystemStateRequest> TViewerWhiteboardRequest<TEvWhiteboard::TEvSystemStateRequest, TEvWhiteboard::TEvSystemStateResponse>::BuildRequest() {
     auto request = TBase::BuildRequest();
@@ -127,6 +149,17 @@ THolder<TEvWhiteboard::TEvSystemStateRequest> TViewerWhiteboardRequest<TEvWhiteb
     return request;
 }
 
+template<>
+void TViewerWhiteboardRequest<TEvWhiteboard::TEvSystemStateRequest, TEvWhiteboard::TEvSystemStateResponse>::Merge(
+        NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvSystemStateResponse& nodeResponse) {
+    auto& target = *viewerResponse.MutableSystemResponse();
+    for (auto& info : *nodeResponse.MutableSystemStateInfo()) {
+        auto& i = *target.AddSystemStateInfo();
+        i.MergeFrom(info);
+        i.SetNodeId(nodeId);
+    }
+}
+
 template<>
 THolder<TEvWhiteboard::TEvVDiskStateRequest> TViewerWhiteboardRequest<TEvWhiteboard::TEvVDiskStateRequest, TEvWhiteboard::TEvVDiskStateResponse>::BuildRequest() {
     auto request = TBase::BuildRequest();
@@ -134,6 +167,17 @@ THolder<TEvWhiteboard::TEvVDiskStateRequest> TViewerWhiteboardRequest<TEvWhitebo
     return request;
 }
 
+template<>
+void TViewerWhiteboardRequest<TEvWhiteboard::TEvVDiskStateRequest, TEvWhiteboard::TEvVDiskStateResponse>::Merge(
+        NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvVDiskStateResponse& nodeResponse) {
+    auto& target = *viewerResponse.MutableVDiskResponse();
+    for (auto& info : *nodeResponse.MutableVDiskStateInfo()) {
+        auto& i = *target.AddVDiskStateInfo();
+        i.MergeFrom(info);
+        i.SetNodeId(nodeId);
+    }
+}
+
 template<>
 THolder<TEvWhiteboard::TEvPDiskStateRequest> TViewerWhiteboardRequest<TEvWhiteboard::TEvPDiskStateRequest, TEvWhiteboard::TEvPDiskStateResponse>::BuildRequest() {
     auto request = TBase::BuildRequest();
@@ -141,6 +185,17 @@ THolder<TEvWhiteboard::TEvPDiskStateRequest> TViewerWhiteboardRequest<TEvWhitebo
     return request;
 }
 
+template<>
+void TViewerWhiteboardRequest<TEvWhiteboard::TEvPDiskStateRequest, TEvWhiteboard::TEvPDiskStateResponse>::Merge(
+        NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvPDiskStateResponse& nodeResponse) {
+    auto& target = *viewerResponse.MutablePDiskResponse();
+    for (auto& info : *nodeResponse.MutablePDiskStateInfo()) {
+        auto& i = *target.AddPDiskStateInfo();
+        i.MergeFrom(info);
+        i.SetNodeId(nodeId);
+    }
+}
+
 template<>
 THolder<TEvWhiteboard::TEvBSGroupStateRequest> TViewerWhiteboardRequest<TEvWhiteboard::TEvBSGroupStateRequest, TEvWhiteboard::TEvBSGroupStateResponse>::BuildRequest() {
     auto request = TBase::BuildRequest();
@@ -148,6 +203,17 @@ THolder<TEvWhiteboard::TEvBSGroupStateRequest> TViewerWhiteboardRequest<TEvWhite
     return request;
 }
 
+template<>
+void TViewerWhiteboardRequest<TEvWhiteboard::TEvBSGroupStateRequest, TEvWhiteboard::TEvBSGroupStateResponse>::Merge(
+        NKikimrViewer::TEvViewerResponse& viewerResponse, TNodeId nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse& nodeResponse) {
+    auto& target = *viewerResponse.MutableBSGroupResponse();
+    for (auto& info : *nodeResponse.MutableBSGroupStateInfo()) {
+        auto& i = *target.AddBSGroupStateInfo();
+        i.MergeFrom(info);
+        i.SetNodeId(nodeId);
+    }
+}
+
 bool IsPostContent(const NMon::TEvHttpInfo::TPtr& event) {
     if (event->Get()->Request.GetMethod() == HTTP_METHOD_POST) {
         const THttpHeaders& headers = event->Get()->Request.GetHeaders();