Browse Source

compute handler - make paging earlier for nodeId

andrew-rykov 1 year ago
parent
commit
d1ba3e5b3d
1 changed files with 184 additions and 130 deletions
  1. 184 130
      ydb/core/viewer/json_compute.h

+ 184 - 130
ydb/core/viewer/json_compute.h

@@ -33,9 +33,13 @@ class TJsonCompute : public TViewerPipeClient<TJsonCompute> {
     THashMap<TString, THolder<NSchemeCache::TSchemeCacheNavigate>> NavigateResult;
     THashMap<TTabletId, THolder<TEvHive::TEvResponseHiveDomainStats>> HiveDomainStats;
     THashMap<TTabletId, THolder<TEvHive::TEvResponseHiveNodeStats>> HiveNodeStats;
+    THashMap<TNodeId, TVector<const NKikimrWhiteboard::TTabletStateInfo*>> TabletInfoIndex;
+    THashMap<TNodeId, const NKikimrHive::THiveNodeStats*> HiveNodeStatsIndex;
+    THashMap<TNodeId, TString> TenantPathByNodeId;
     NMon::TEvHttpInfo::TPtr Event;
-    THashSet<TNodeId> NodeIds;
-    std::unordered_set<TNodeId> FoundNodeIds;
+    TVector<TNodeId> NodeIds;
+    THashSet<TNodeId> PassedNodeIds;
+    THashSet<TNodeId> FoundNodeIds;
     THashMap<TNodeId, NKikimrWhiteboard::TEvSystemStateResponse> NodeSysInfo;
     TMap<TNodeId, NKikimrWhiteboard::TEvTabletStateResponse> NodeTabletInfo;
     THolder<TEvInterconnect::TEvNodesInfo> NodesInfo;
@@ -49,9 +53,9 @@ class TJsonCompute : public TViewerPipeClient<TJsonCompute> {
     TTabletId RootHiveId = 0;
     bool RootHiveRequested = false;
     NKikimrViewer::TComputeInfo Result;
-    ui32 UptimeSeconds = 0;
-    bool ProblemNodesOnly = false;
-    TString Filter;
+    ui32 UptimeSecondsFilter = 0;
+    bool ProblemNodesFilter = false;
+    TString TextFilter;
 
     enum class EVersion {
         v1,
@@ -73,6 +77,7 @@ class TJsonCompute : public TViewerPipeClient<TJsonCompute> {
     std::optional<ui32> Limit;
     ESort Sort = ESort::NodeId;
     bool ReverseSort = false;
+    bool IsNodesListSorted = false;
 
 public:
     static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
@@ -112,9 +117,9 @@ public:
         Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
         Tablets = FromStringWithDefault<bool>(params.Get("tablets"), Tablets);
         Path = params.Get("path");
-        UptimeSeconds = FromStringWithDefault<ui32>(params.Get("uptime"), 0);
-        ProblemNodesOnly = FromStringWithDefault<bool>(params.Get("problems_only"), ProblemNodesOnly);
-        Filter = params.Get("filter");
+        UptimeSecondsFilter = FromStringWithDefault<ui32>(params.Get("uptime"), 0);
+        ProblemNodesFilter = FromStringWithDefault<bool>(params.Get("problems_only"), ProblemNodesFilter);
+        TextFilter = params.Get("filter");
         if (params.Has("offset")) {
             Offset = FromStringWithDefault<ui32>(params.Get("offset"), 0);
         }
@@ -238,10 +243,48 @@ public:
         RequestDone();
     }
 
+    bool IsPageNode(TNodeId nodeId) {
+        if (PassedNodeIds.insert(nodeId).second) {
+            if (Offset.has_value()) {
+                if (PassedNodeIds.size() <= Offset.value()) {
+                    return false;
+                }
+            }
+            if (Limit.has_value()) {
+                if (NodeIds.size() >= Limit.value()) {
+                    return false;
+                }
+            }
+            return true;
+        }
+        return false;
+    }
+
+    bool IsRequiredNode(TNodeId nodeId) {
+        TIntrusivePtr<TDynamicNameserviceConfig> dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig;
+        return nodeId > dynamicNameserviceConfig->MaxStaticNodeId && (!IsNodesListSorted || IsPageNode(nodeId));
+    }
+
+    bool NeedNodesSorting() {
+        return Version == EVersion::v2;
+    }
+
+    bool IsNodeFilter() {
+        return ProblemNodesFilter || UptimeSecondsFilter > 0 && TextFilter;
+    }
+
     void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) {
-        for (const NKikimrHive::THiveNodeStats& nodeStat : ev->Get()->Record.GetNodeStats()) {
+        BLOG_TRACE("ProcessNodeIds()");
+
+        auto nodeStats = ev->Get()->Record.GetNodeStats();
+        if (NeedNodesSorting() && Sort == ESort::NodeId && !IsNodeFilter()) {
+            SortCollection(nodeStats, [](const NKikimrHive::THiveNodeStats& node) { return node.GetNodeId();}, ReverseSort);
+            IsNodesListSorted = true;
+        }
+        for (const NKikimrHive::THiveNodeStats& nodeStat : nodeStats) {
             auto nodeId = nodeStat.GetNodeId();
-            if (NodeIds.insert(nodeId).second) {
+            if (IsRequiredNode(nodeId)) {
+                NodeIds.emplace_back(nodeId); // order is important
                 TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId);
                 THolder<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest> request = MakeHolder<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest>();
                 SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId);
@@ -333,19 +376,19 @@ public:
         if (itSysInfo != NodeSysInfo.end()) {
             if (itSysInfo->second.SystemStateInfoSize() == 1) {
                 const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0);
-                if (UptimeSeconds > 0 && sysInfo.HasStartTime() && sysInfo.HasChangeTime()
-                        && sysInfo.GetChangeTime() - sysInfo.GetStartTime() > UptimeSeconds * 1000) {
+                if (UptimeSecondsFilter > 0 && sysInfo.HasStartTime() && sysInfo.HasChangeTime()
+                        && sysInfo.GetChangeTime() - sysInfo.GetStartTime() > UptimeSecondsFilter * 1000) {
                     return false;
                 }
-                if (ProblemNodesOnly && sysInfo.HasSystemState()
+                if (ProblemNodesFilter && sysInfo.HasSystemState()
                         && GetViewerFlag(sysInfo.GetSystemState()) == NKikimrViewer::EFlag::Green) {
                     return false;
                 }
-                if (Filter) {
-                    if (sysInfo.HasHost() && sysInfo.GetHost().Contains(Filter)) {
+                if (TextFilter) {
+                    if (sysInfo.HasHost() && sysInfo.GetHost().Contains(TextFilter)) {
                         return true;
                     }
-                    if (std::to_string(nodeId).contains(Filter)) {
+                    if (std::to_string(nodeId).contains(TextFilter)) {
                         return true;
                     }
                     return false;
@@ -355,6 +398,13 @@ public:
         return true;
     }
 
+    static double GetLoadAverage(const NKikimrViewer::TComputeNodeInfo& nodeInfo) {
+        if (nodeInfo.LoadAverageSize() > 0) {
+            return nodeInfo.GetLoadAverage(0);
+        }
+        return 0;
+    }
+
     static double GetCPU(const NKikimrViewer::TComputeNodeInfo& nodeInfo) {
         double cpu = 0;
         if (nodeInfo.PoolStatsSize() > 0) {
@@ -365,17 +415,10 @@ public:
         return cpu;
     }
 
-    static double GetLoadAverage(const NKikimrViewer::TComputeNodeInfo& nodeInfo) {
-        if (nodeInfo.LoadAverageSize() > 0) {
-            return nodeInfo.GetLoadAverage(0);
-        }
-        return 0;
-    }
-
     void PaginateNodes(::google::protobuf::RepeatedPtrField<NKikimrViewer::TComputeNodeInfo>& nodes) {
         switch (Sort) {
             case ESort::NodeId:
-                SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetNodeId();}, ReverseSort);
+                // already sorted
                 break;
             case ESort::Host:
                 SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetHost();}, ReverseSort);
@@ -417,129 +460,144 @@ public:
         }
     }
 
+    void FillResponseNode(const TNodeId nodeId, const TString& path) {
+        if (!CheckNodeFilters(nodeId))
+            return;
+        FoundNodeIds.insert(nodeId);
+        NKikimrViewer::TComputeNodeInfo& computeNodeInfo = Version == EVersion::v1
+            ? *Result.MutableTenants(Result.TenantsSize() - 1)->AddNodes()
+            : *Result.AddNodes();
+        if (Version == EVersion::v2) {
+            computeNodeInfo.SetTenant(path);
+        }
+        computeNodeInfo.SetNodeId(nodeId);
+        auto itSysInfo = NodeSysInfo.find(nodeId);
+        if (itSysInfo != NodeSysInfo.end()) {
+            if (itSysInfo->second.SystemStateInfoSize() == 1) {
+                const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0);
+                if (sysInfo.HasStartTime()) {
+                    computeNodeInfo.SetStartTime(sysInfo.GetStartTime());
+                }
+                if (sysInfo.HasChangeTime()) {
+                    computeNodeInfo.SetChangeTime(sysInfo.GetChangeTime());
+                }
+                computeNodeInfo.MutableSystemLocation()->MergeFrom(sysInfo.GetSystemLocation());
+                computeNodeInfo.MutableLoadAverage()->MergeFrom(sysInfo.GetLoadAverage());
+                if (sysInfo.HasNumberOfCpus()) {
+                    computeNodeInfo.SetNumberOfCpus(sysInfo.GetNumberOfCpus());
+                }
+                // TODO(xenoxeno)
+                if (sysInfo.HasSystemState()) {
+                    computeNodeInfo.SetOverall(GetViewerFlag(sysInfo.GetSystemState()));
+                }
+                if (sysInfo.HasNodeId()) {
+                    computeNodeInfo.SetNodeId(sysInfo.GetNodeId());
+                }
+                if (sysInfo.HasDataCenter()) {
+                    computeNodeInfo.SetDataCenter(sysInfo.GetDataCenter());
+                }
+                if (sysInfo.HasRack()) {
+                    computeNodeInfo.SetRack(sysInfo.GetRack());
+                }
+                if (sysInfo.HasHost()) {
+                    computeNodeInfo.SetHost(sysInfo.GetHost());
+                }
+                if (sysInfo.HasVersion()) {
+                    computeNodeInfo.SetVersion(sysInfo.GetVersion());
+                }
+                if (sysInfo.HasMemoryUsed()) {
+                    computeNodeInfo.SetMemoryUsed(sysInfo.GetMemoryUsed());
+                }
+                if (sysInfo.HasMemoryLimit()) {
+                    computeNodeInfo.SetMemoryLimit(sysInfo.GetMemoryLimit());
+                }
+                computeNodeInfo.MutablePoolStats()->MergeFrom(sysInfo.GetPoolStats());
+                computeNodeInfo.MutableEndpoints()->MergeFrom(sysInfo.GetEndpoints());
+                computeNodeInfo.MutableRoles()->MergeFrom(sysInfo.GetRoles());
+
+            }
+        }
+        auto itTabletInfo = TabletInfoIndex.find(nodeId);
+        if (itTabletInfo != TabletInfoIndex.end()) {
+            THashMap<std::pair<NKikimrTabletBase::TTabletTypes::EType, NKikimrViewer::EFlag>, NKikimrViewer::TTabletStateInfo> tablets;
+            for (const auto* pTabletInfo : itTabletInfo->second) {
+                const auto& tabletInfo = *pTabletInfo;
+                if (tabletInfo.GetState() != NKikimrWhiteboard::TTabletStateInfo::Deleted) {
+                    NKikimrViewer::EFlag state = GetFlagFromTabletState(tabletInfo.GetState());
+                    auto& tablet = tablets[std::make_pair(tabletInfo.GetType(), state)];
+                    tablet.SetCount(tablet.GetCount() + 1);
+                }
+            }
+            for (const auto& [prTypeState, tabletInfo] : tablets) {
+                NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets();
+                tablet.MergeFrom(tabletInfo);
+                tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first));
+                tablet.SetState(prTypeState.second);
+            }
+        }
+        auto itHiveNodeStats = HiveNodeStatsIndex.find(nodeId);
+        if (itHiveNodeStats != HiveNodeStatsIndex.end()) {
+            computeNodeInfo.MutableMetrics()->CopyFrom(itHiveNodeStats->second->GetMetrics());
+            for (const auto& state : itHiveNodeStats->second->GetStateStats()) {
+                if (state.HasTabletType()) {
+                    NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets();
+                    tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(state.GetTabletType()));
+                    tablet.SetCount(state.GetCount());
+                    NKikimrViewer::EFlag flag = GetFlagFromTabletState(state.GetVolatileState());
+                    tablet.SetState(flag);
+                }
+            }
+        }
+    }
+
     void ReplyAndPassAway() {
-        THashMap<TNodeId, TVector<const NKikimrWhiteboard::TTabletStateInfo*>> tabletInfoIndex;
         NKikimrWhiteboard::TEvTabletStateResponse tabletInfo;
         MergeWhiteboardResponses(tabletInfo, NodeTabletInfo);
         for (const auto& info : tabletInfo.GetTabletStateInfo()) {
-            tabletInfoIndex[info.GetNodeId()].emplace_back(&info);
+            TabletInfoIndex[info.GetNodeId()].emplace_back(&info);
         }
-        THashMap<TNodeId, const NKikimrHive::THiveNodeStats*> hiveNodeStatsIndex;
         auto itRootHiveNodeStats = HiveNodeStats.find(RootHiveId);
         if (itRootHiveNodeStats != HiveNodeStats.end()) {
             for (const auto& stats : itRootHiveNodeStats->second->Record.GetNodeStats()) {
-                hiveNodeStatsIndex[stats.GetNodeId()] = &stats;
+                HiveNodeStatsIndex[stats.GetNodeId()] = &stats;
             }
         }
         for (const auto& prStats : HiveNodeStats) {
             if (prStats.first != RootHiveId) {
                 for (const auto& stats : prStats.second->Record.GetNodeStats()) {
-                    hiveNodeStatsIndex[stats.GetNodeId()] = &stats;
+                    HiveNodeStatsIndex[stats.GetNodeId()] = &stats;
                 }
             }
         }
-        for (const std::pair<const TString, NKikimrViewer::TTenant>& prTenant : TenantByPath) {
-            const TString& path = prTenant.first;
-            if (Version == EVersion::v1) {
-                NKikimrViewer::TComputeTenantInfo& computeTenantInfo = *Result.AddTenants();
-                computeTenantInfo.SetName(path);
-                // TODO(xenoxeno)
-                computeTenantInfo.SetOverall(NKikimrViewer::EFlag::Green);
-            }
-            auto itSubDomainKey = SubDomainKeyByPath.find(path);
-            if (itSubDomainKey != SubDomainKeyByPath.end()) {
-                TPathId subDomainKey(itSubDomainKey->second);
-                const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]);
-                for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) {
-                    if (!CheckNodeFilters(nodeId))
-                        continue;
-                    FoundNodeIds.insert(nodeId);
-                    NKikimrViewer::TComputeNodeInfo& computeNodeInfo = Version == EVersion::v1
-                        ? *Result.MutableTenants(Result.TenantsSize() - 1)->AddNodes()
-                        : *Result.AddNodes();
-                    if (Version == EVersion::v2) {
-                        computeNodeInfo.SetTenant(path);
-                    }
-                    computeNodeInfo.SetNodeId(nodeId);
-                    auto itSysInfo = NodeSysInfo.find(nodeId);
-                    if (itSysInfo != NodeSysInfo.end()) {
-                        if (itSysInfo->second.SystemStateInfoSize() == 1) {
-                            const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0);
-                            if (sysInfo.HasStartTime()) {
-                                computeNodeInfo.SetStartTime(sysInfo.GetStartTime());
-                            }
-                            if (sysInfo.HasChangeTime()) {
-                                computeNodeInfo.SetChangeTime(sysInfo.GetChangeTime());
-                            }
-                            computeNodeInfo.MutableSystemLocation()->MergeFrom(sysInfo.GetSystemLocation());
-                            computeNodeInfo.MutableLoadAverage()->MergeFrom(sysInfo.GetLoadAverage());
-                            if (sysInfo.HasNumberOfCpus()) {
-                                computeNodeInfo.SetNumberOfCpus(sysInfo.GetNumberOfCpus());
-                            }
-                            // TODO(xenoxeno)
-                            if (sysInfo.HasSystemState()) {
-                                computeNodeInfo.SetOverall(GetViewerFlag(sysInfo.GetSystemState()));
-                            }
-                            if (sysInfo.HasNodeId()) {
-                                computeNodeInfo.SetNodeId(sysInfo.GetNodeId());
-                            }
-                            if (sysInfo.HasDataCenter()) {
-                                computeNodeInfo.SetDataCenter(sysInfo.GetDataCenter());
-                            }
-                            if (sysInfo.HasRack()) {
-                                computeNodeInfo.SetRack(sysInfo.GetRack());
-                            }
-                            if (sysInfo.HasHost()) {
-                                computeNodeInfo.SetHost(sysInfo.GetHost());
-                            }
-                            if (sysInfo.HasVersion()) {
-                                computeNodeInfo.SetVersion(sysInfo.GetVersion());
-                            }
-                            if (sysInfo.HasMemoryUsed()) {
-                                computeNodeInfo.SetMemoryUsed(sysInfo.GetMemoryUsed());
-                            }
-                            if (sysInfo.HasMemoryLimit()) {
-                                computeNodeInfo.SetMemoryLimit(sysInfo.GetMemoryLimit());
-                            }
-                            computeNodeInfo.MutablePoolStats()->MergeFrom(sysInfo.GetPoolStats());
-                            computeNodeInfo.MutableEndpoints()->MergeFrom(sysInfo.GetEndpoints());
-                            computeNodeInfo.MutableRoles()->MergeFrom(sysInfo.GetRoles());
 
-                        }
-                    }
-                    auto itTabletInfo = tabletInfoIndex.find(nodeId);
-                    if (itTabletInfo != tabletInfoIndex.end()) {
-                        THashMap<std::pair<NKikimrTabletBase::TTabletTypes::EType, NKikimrViewer::EFlag>, NKikimrViewer::TTabletStateInfo> tablets;
-                        for (const auto* pTabletInfo : itTabletInfo->second) {
-                            const auto& tabletInfo = *pTabletInfo;
-                            if (tabletInfo.GetState() != NKikimrWhiteboard::TTabletStateInfo::Deleted) {
-                                NKikimrViewer::EFlag state = GetFlagFromTabletState(tabletInfo.GetState());
-                                auto& tablet = tablets[std::make_pair(tabletInfo.GetType(), state)];
-                                tablet.SetCount(tablet.GetCount() + 1);
-                            }
-                        }
-                        for (const auto& [prTypeState, tabletInfo] : tablets) {
-                            NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets();
-                            tablet.MergeFrom(tabletInfo);
-                            tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first));
-                            tablet.SetState(prTypeState.second);
-                        }
-                    }
-                    auto itHiveNodeStats = hiveNodeStatsIndex.find(nodeId);
-                    if (itHiveNodeStats != hiveNodeStatsIndex.end()) {
-                        computeNodeInfo.MutableMetrics()->CopyFrom(itHiveNodeStats->second->GetMetrics());
-                        for (const auto& state : itHiveNodeStats->second->GetStateStats()) {
-                            if (state.HasTabletType()) {
-                                NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets();
-                                tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(state.GetTabletType()));
-                                tablet.SetCount(state.GetCount());
-                                NKikimrViewer::EFlag flag = GetFlagFromTabletState(state.GetVolatileState());
-                                tablet.SetState(flag);
-                            }
+            for (const std::pair<const TString, NKikimrViewer::TTenant>& prTenant : TenantByPath) {
+                const TString& path = prTenant.first;
+                if (Version == EVersion::v1) {
+                    NKikimrViewer::TComputeTenantInfo& computeTenantInfo = *Result.AddTenants();
+                    computeTenantInfo.SetName(path);
+                    // TODO(xenoxeno)
+                    computeTenantInfo.SetOverall(NKikimrViewer::EFlag::Green);
+                }
+                auto itSubDomainKey = SubDomainKeyByPath.find(path);
+                if (itSubDomainKey != SubDomainKeyByPath.end()) {
+                    TPathId subDomainKey(itSubDomainKey->second);
+                    const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]);
+                    for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) {
+                        if (IsNodesListSorted) {
+                            TenantPathByNodeId[nodeId] = path;
+                        } else {
+                            FillResponseNode(nodeId, path);
                         }
                     }
                 }
             }
+
+        if (IsNodesListSorted) {
+            for (TNodeId nodeId : NodeIds) {
+                FillResponseNode(nodeId, TenantPathByNodeId[nodeId]);
+            }
+        } else if (NeedNodesSorting()) {
+            PaginateNodes(*Result.MutableNodes());
         }
 
         Result.SetTotalNodes(NodeIds.size());
@@ -547,10 +605,6 @@ public:
         // TODO(xenoxeno)
         Result.SetOverall(NKikimrViewer::EFlag::Green);
 
-        if (Version == EVersion::v2) {
-            PaginateNodes(*Result.MutableNodes());
-        }
-
         TStringStream json;
         TProtoToJson::ProtoToJson(json, Result, JsonSettings);
         Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom));