viewer_nodes.h 93 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218
  1. #pragma once
  2. #include "json_handlers.h"
  3. #include "json_pipe_req.h"
  4. #include "log.h"
  5. #include "viewer.h"
  6. #include "viewer_helper.h"
  7. #include "viewer_tabletinfo.h"
  8. #include "wb_group.h"
  9. #include <library/cpp/protobuf/json/proto2json.h>
  10. namespace NKikimr::NViewer {
  11. using namespace NProtobufJson;
  12. using namespace NActors;
  13. using namespace NNodeWhiteboard;
  14. enum class ENodeFields : ui8 {
  15. NodeInfo,
  16. SystemState,
  17. PDisks,
  18. VDisks,
  19. Tablets,
  20. NodeId,
  21. HostName,
  22. DC,
  23. Rack,
  24. Version,
  25. Uptime,
  26. Memory,
  27. CPU,
  28. LoadAverage,
  29. Missing,
  30. DiskSpaceUsage,
  31. SubDomainKey,
  32. DisconnectTime,
  33. Database,
  34. COUNT
  35. };
  36. constexpr ui8 operator +(ENodeFields e) {
  37. return static_cast<ui8>(e);
  38. }
  39. class TJsonNodes : public TViewerPipeClient {
  40. using TThis = TJsonNodes;
  41. using TBase = TViewerPipeClient;
  42. using TNodeId = ui32;
  43. using TPDiskId = std::pair<TNodeId, ui32>;
  44. using TFieldsType = std::bitset<+ENodeFields::COUNT>;
  45. enum ENavigateRequest {
  46. ENavigateRequestDatabase,
  47. ENavigateRequestResource,
  48. ENavigateRequestPath,
  49. };
  50. enum EBoardInfoRequest {
  51. EBoardInfoRequestDatabase,
  52. EBoardInfoRequestResource,
  53. };
  54. std::optional<TRequestResponse<TEvInterconnect::TEvNodesInfo>> NodesInfoResponse;
  55. std::optional<TRequestResponse<TEvWhiteboard::TEvNodeStateResponse>> NodeStateResponse;
  56. std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> DatabaseBoardInfoResponse;
  57. std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> ResourceBoardInfoResponse;
  58. std::optional<TRequestResponse<TEvTxProxySchemeCache::TEvNavigateKeySetResult>> PathNavigateResponse;
  59. std::unordered_map<TTabletId, TRequestResponse<TEvHive::TEvResponseHiveNodeStats>> HiveNodeStats;
  60. std::vector<TTabletId> HivesToAsk;
  61. bool AskHiveAboutPaths = false;
  62. std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetStoragePoolsResponse>> StoragePoolsResponse;
  63. std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetGroupsResponse>> GroupsResponse;
  64. std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetVSlotsResponse>> VSlotsResponse;
  65. std::optional<TRequestResponse<NSysView::TEvSysView::TEvGetPDisksResponse>> PDisksResponse;
  66. int WhiteboardStateRequestsInFlight = 0;
  67. std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvSystemStateResponse>> SystemStateResponse;
  68. std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvVDiskStateResponse>> VDiskStateResponse;
  69. std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvPDiskStateResponse>> PDiskStateResponse;
  70. std::unordered_map<TNodeId, TRequestResponse<TEvWhiteboard::TEvTabletStateResponse>> TabletStateResponse;
  71. std::unordered_map<TNodeId, TRequestResponse<TEvViewer::TEvViewerResponse>> SystemViewerResponse;
  72. std::unordered_map<TNodeId, TRequestResponse<TEvViewer::TEvViewerResponse>> TabletViewerResponse;
  73. TJsonSettings JsonSettings;
  74. ui32 Timeout = 0;
  75. enum ETimeoutTag : ui64 {
  76. NoTimeout,
  77. TimeoutTablets,
  78. TimeoutFinal,
  79. };
  80. ETimeoutTag CurrentTimeoutState = NoTimeout;
  81. TString SharedDatabase;
  82. bool FilterDatabase = false;
  83. bool HasDatabaseNodes = false;
  84. TPathId FilterPathId;
  85. TSubDomainKey SubDomainKey;
  86. TSubDomainKey SharedSubDomainKey;
  87. bool FilterSubDomainKey = false;
  88. TString FilterPath;
  89. TString FilterStoragePool;
  90. std::pair<ui64, ui64> FilterStoragePoolId;
  91. std::unordered_set<TNodeId> FilterNodeIds;
  92. std::unordered_set<ui32> FilterGroupIds;
  93. std::optional<std::size_t> Offset;
  94. std::optional<std::size_t> Limit;
  95. ui32 UptimeSeconds = 0;
  96. bool ProblemNodesOnly = false;
  97. TString Filter;
  98. bool AllWhiteboardFields = false;
  99. enum class EWith {
  100. Everything,
  101. MissingDisks,
  102. SpaceProblems,
  103. };
  104. EWith With = EWith::Everything;
  105. enum class EType {
  106. Any,
  107. Static,
  108. Dynamic,
  109. };
  110. EType Type = EType::Any;
  111. enum class EFilterStorageStage {
  112. None,
  113. Pools,
  114. Groups,
  115. VSlots,
  116. };
  117. EFilterStorageStage FilterStorageStage = EFilterStorageStage::None;
  118. TNodeId MinAllowedNodeId = std::numeric_limits<TNodeId>::min();
  119. TNodeId MaxAllowedNodeId = std::numeric_limits<TNodeId>::max();
  120. std::optional<std::size_t> MaximumDisksPerNode;
  121. std::optional<std::size_t> MaximumSlotsPerDisk;
  122. ui32 SpaceUsageProblem = 90; // %
  123. bool OffloadMerge = true;
  124. size_t OffloadMergeAttempts = 2;
  125. struct TNode {
  126. TEvInterconnect::TNodeInfo NodeInfo;
  127. NKikimrWhiteboard::TSystemStateInfo SystemState;
  128. std::vector<NKikimrWhiteboard::TPDiskStateInfo> PDisks;
  129. std::vector<NKikimrSysView::TPDiskEntry> SysViewPDisks;
  130. std::vector<NKikimrWhiteboard::TVDiskStateInfo> VDisks;
  131. std::vector<NKikimrSysView::TVSlotEntry> SysViewVDisks;
  132. std::vector<NKikimrViewer::TTabletStateInfo> Tablets;
  133. TSubDomainKey SubDomainKey;
  134. TString Database;
  135. ui32 MissingDisks = 0;
  136. float DiskSpaceUsage = 0; // the highest
  137. bool Problems = false;
  138. bool Connected = false;
  139. bool Disconnected = false;
  140. bool HasDisks = false;
  141. bool GotDatabaseFromDatabaseBoardInfo = false;
  142. bool GotDatabaseFromResourceBoardInfo = false;
  143. TNodeId GetNodeId() const {
  144. return NodeInfo.NodeId;
  145. }
  146. TString GetHostName() const {
  147. if (NodeInfo.Host) {
  148. return NodeInfo.Host;
  149. }
  150. if (SystemState.GetHost()) {
  151. return SystemState.GetHost();
  152. }
  153. if (NodeInfo.ResolveHost) {
  154. return NodeInfo.ResolveHost;
  155. }
  156. return {};
  157. }
  158. TString GetDataCenter() const {
  159. if (NodeInfo.Location.GetDataCenterId()) {
  160. return NodeInfo.Location.GetDataCenterId();
  161. }
  162. return SystemState.GetLocation().GetDataCenter();
  163. }
  164. TString GetRack() const {
  165. if (NodeInfo.Location.GetRackId()) {
  166. return NodeInfo.Location.GetRackId();
  167. }
  168. return SystemState.GetLocation().GetRack();
  169. }
  170. void Cleanup() {
  171. if (SystemState.HasSystemLocation()) {
  172. SystemState.ClearSystemLocation();
  173. }
  174. if (SystemState.HasLocation()) {
  175. if (SystemState.GetLocation().GetDataCenter().empty()) {
  176. SystemState.MutableLocation()->ClearDataCenter();
  177. }
  178. if (SystemState.GetLocation().GetRack().empty()) {
  179. SystemState.MutableLocation()->ClearRack();
  180. }
  181. if (SystemState.GetLocation().GetUnit().empty() || SystemState.GetLocation().GetUnit() == "0") {
  182. SystemState.MutableLocation()->ClearUnit();
  183. }
  184. }
  185. }
  186. void CalcDatabase() {
  187. if (SystemState.TenantsSize() == 1) {
  188. Database = SystemState.GetTenants(0);
  189. }
  190. }
  191. void CalcDisks() {
  192. MissingDisks = 0;
  193. DiskSpaceUsage = 0;
  194. if (!PDisks.empty()) {
  195. for (const auto& pdisk : PDisks) {
  196. float diskSpaceUsage = pdisk.GetTotalSize() ? 100.0 * (pdisk.GetTotalSize() - pdisk.GetAvailableSize()) / pdisk.GetTotalSize() : 0;
  197. DiskSpaceUsage = std::max(DiskSpaceUsage, diskSpaceUsage);
  198. if (pdisk.state() == NKikimrBlobStorage::TPDiskState::Normal) {
  199. continue;
  200. }
  201. ++MissingDisks;
  202. }
  203. } else {
  204. for (const auto& entry : SysViewPDisks) {
  205. const auto& pdisk(entry.GetInfo());
  206. float diskSpaceUsage = pdisk.GetTotalSize() ? 100.0 * (pdisk.GetTotalSize() - pdisk.GetAvailableSize()) / pdisk.GetTotalSize() : 0;
  207. DiskSpaceUsage = std::max(DiskSpaceUsage, diskSpaceUsage);
  208. NKikimrBlobStorage::EDriveStatus driveStatus = NKikimrBlobStorage::EDriveStatus::UNKNOWN;
  209. if (NKikimrBlobStorage::EDriveStatus_Parse(pdisk.GetStatusV2(), &driveStatus)) {
  210. switch (driveStatus) {
  211. case NKikimrBlobStorage::EDriveStatus::ACTIVE:
  212. case NKikimrBlobStorage::EDriveStatus::INACTIVE:
  213. continue;
  214. default:
  215. ++MissingDisks;
  216. break;
  217. }
  218. }
  219. }
  220. }
  221. }
  222. void DisconnectNode() {
  223. Problems = true;
  224. Disconnected = true;
  225. if (!SystemState.HasDisconnectTime()) {
  226. TInstant disconnectTime;
  227. for (const auto& entry : SysViewPDisks) {
  228. const auto& pdisk(entry.GetInfo());
  229. disconnectTime = std::max(disconnectTime, TInstant::MicroSeconds(pdisk.GetStatusChangeTimestamp()));
  230. }
  231. if (disconnectTime) {
  232. SystemState.SetDisconnectTime(disconnectTime.Seconds());
  233. }
  234. }
  235. }
  236. void RemapDisks() {
  237. if (PDisks.empty() && !SysViewPDisks.empty()) {
  238. for (const auto& entry : SysViewPDisks) {
  239. const auto& pdisk(entry.GetInfo());
  240. auto& pDiskState = PDisks.emplace_back();
  241. NKikimrBlobStorage::EDriveStatus driveStatus = NKikimrBlobStorage::EDriveStatus::UNKNOWN;
  242. if (NKikimrBlobStorage::EDriveStatus_Parse(pdisk.GetStatusV2(), &driveStatus)) {
  243. switch (driveStatus) {
  244. case NKikimrBlobStorage::EDriveStatus::ACTIVE:
  245. case NKikimrBlobStorage::EDriveStatus::INACTIVE:
  246. pDiskState.SetState(NKikimrBlobStorage::TPDiskState::Normal);
  247. break;
  248. default:
  249. break;
  250. }
  251. }
  252. pDiskState.SetPDiskId(entry.GetKey().GetPDiskId());
  253. pDiskState.SetNodeId(entry.GetKey().GetNodeId());
  254. pDiskState.SetPath(pdisk.GetPath());
  255. pDiskState.SetGuid(pdisk.GetGuid());
  256. pDiskState.SetTotalSize(pdisk.GetTotalSize());
  257. pDiskState.SetAvailableSize(pdisk.GetAvailableSize());
  258. pDiskState.SetExpectedSlotCount(pdisk.GetExpectedSlotCount());
  259. }
  260. }
  261. if (VDisks.empty() && !SysViewVDisks.empty()) {
  262. for (const auto& entry : SysViewVDisks) {
  263. const auto& vdisk(entry.GetInfo());
  264. auto& vDiskState = VDisks.emplace_back();
  265. vDiskState.MutableVDiskId()->SetGroupID(vdisk.GetGroupId());
  266. vDiskState.MutableVDiskId()->SetGroupGeneration(vdisk.GetGroupGeneration());
  267. vDiskState.MutableVDiskId()->SetRing(vdisk.GetFailRealm());
  268. vDiskState.MutableVDiskId()->SetDomain(vdisk.GetFailDomain());
  269. vDiskState.MutableVDiskId()->SetVDisk(vdisk.GetVDisk());
  270. vDiskState.SetNodeId(entry.GetKey().GetNodeId());
  271. vDiskState.SetPDiskId(entry.GetKey().GetPDiskId());
  272. vDiskState.SetAllocatedSize(vdisk.GetAllocatedSize());
  273. vDiskState.SetAvailableSize(vdisk.GetAvailableSize());
  274. vDiskState.SetVDiskSlotId(entry.GetKey().GetVSlotId());
  275. NKikimrBlobStorage::EVDiskStatus vDiskStatus;
  276. if (NKikimrBlobStorage::EVDiskStatus_Parse(vdisk.GetStatusV2(), &vDiskStatus)) {
  277. switch(vDiskStatus) {
  278. case NKikimrBlobStorage::EVDiskStatus::ERROR:
  279. vDiskState.SetVDiskState(NKikimrWhiteboard::EVDiskState::LocalRecoveryError);
  280. break;
  281. case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING:
  282. vDiskState.SetVDiskState(NKikimrWhiteboard::EVDiskState::Initial);
  283. break;
  284. case NKikimrBlobStorage::EVDiskStatus::REPLICATING:
  285. vDiskState.SetVDiskState(NKikimrWhiteboard::EVDiskState::OK);
  286. vDiskState.SetReplicated(false);
  287. break;
  288. case NKikimrBlobStorage::EVDiskStatus::READY:
  289. vDiskState.SetVDiskState(NKikimrWhiteboard::EVDiskState::OK);
  290. break;
  291. }
  292. }
  293. }
  294. }
  295. }
  296. bool IsStatic() const {
  297. return NodeInfo.IsStatic;
  298. }
  299. NKikimrWhiteboard::EFlag GetOverall() const {
  300. return SystemState.GetSystemState();
  301. }
  302. int GetCandidateScore() const {
  303. int score = 0;
  304. if (Connected) {
  305. score += 100;
  306. }
  307. if (IsStatic()) {
  308. score += 10;
  309. }
  310. return score;
  311. }
  312. TString GetDiskUsageForGroup() const {
  313. //return TStringBuilder() << std::ceil(std::clamp<float>(DiskSpaceUsage, 0, 100) / 5) * 5 << '%';
  314. // we want 0%-95% groups instead of 5%-100% groups
  315. return TStringBuilder() << std::floor(std::clamp<float>(DiskSpaceUsage, 0, 100) / 5) * 5 << '%';
  316. }
  317. TString GetUptimeForGroup(TInstant now) const {
  318. if (!Disconnected) {
  319. auto uptime = static_cast<int>(now.Seconds()) - SystemState.GetStartTime();
  320. if (uptime < 60 * 10) {
  321. return "uptime < 10m";
  322. }
  323. if (uptime < 60 * 60) {
  324. return "uptime < 1h";
  325. }
  326. if (uptime < 60 * 60 * 24) {
  327. return "uptime < 24h";
  328. }
  329. if (uptime < 60 * 60 * 24 * 7) {
  330. return "uptime < 1 week";
  331. }
  332. return "uptime > 1 week";
  333. } else {
  334. if (SystemState.HasDisconnectTime()) {
  335. auto downtime = static_cast<int>(now.Seconds()) - SystemState.GetDisconnectTime();
  336. if (downtime < 60 * 10) {
  337. return "downtime < 10m";
  338. }
  339. if (downtime < 60 * 60) {
  340. return "downtime < 1h";
  341. }
  342. if (downtime < 60 * 60 * 24) {
  343. return "downtime < 24h";
  344. }
  345. if (downtime < 60 * 60 * 24 * 7) {
  346. return "downtime < 1 week";
  347. }
  348. return "downtime > 1 week";
  349. } else {
  350. return "disconnected";
  351. }
  352. }
  353. }
  354. TString GetVersionForGroup() const {
  355. if (SystemState.HasVersion()) {
  356. return SystemState.GetVersion();
  357. } else {
  358. return "unknown";
  359. }
  360. }
  361. bool HasDatabase(const TString& database) const {
  362. return Database == database;
  363. }
  364. bool HasSubDomainKey(const TSubDomainKey& subDomainKey) const {
  365. return SubDomainKey == subDomainKey;
  366. }
  367. };
  368. struct TNodeGroup {
  369. TString Name;
  370. std::vector<TNode*> Nodes;
  371. };
  372. struct TNodeBatch {
  373. std::vector<TNode*> NodesToAskFor;
  374. std::vector<TNode*> NodesToAskAbout;
  375. size_t Offset = 0;
  376. bool HasStaticNodes = false;
  377. TNodeId ChooseNodeId() {
  378. if (Offset >= NodesToAskFor.size()) {
  379. return 0;
  380. }
  381. return NodesToAskFor[Offset++]->GetNodeId();
  382. }
  383. };
  384. using TNodeData = std::vector<TNode>;
  385. using TNodeView = std::deque<TNode*>;
  386. TNodeData NodeData;
  387. TNodeView NodeView;
  388. std::vector<TNodeGroup> NodeGroups;
  389. std::unordered_map<TNodeId, TNode*> NodesByNodeId;
  390. std::unordered_map<TNodeId, TNodeBatch> NodeBatches;
  391. TFieldsType FieldsRequired;
  392. TFieldsType FieldsAvailable;
  393. const TFieldsType FieldsAll = TFieldsType().set();
  394. const TFieldsType FieldsNodeInfo = TFieldsType().set(+ENodeFields::NodeInfo)
  395. .set(+ENodeFields::NodeId)
  396. .set(+ENodeFields::HostName)
  397. .set(+ENodeFields::DC)
  398. .set(+ENodeFields::Rack);
  399. const TFieldsType FieldsSystemState = TFieldsType().set(+ENodeFields::SystemState)
  400. .set(+ENodeFields::Database)
  401. .set(+ENodeFields::Version)
  402. .set(+ENodeFields::Uptime)
  403. .set(+ENodeFields::Memory)
  404. .set(+ENodeFields::CPU)
  405. .set(+ENodeFields::LoadAverage);
  406. const TFieldsType FieldsPDisks = TFieldsType().set(+ENodeFields::PDisks)
  407. .set(+ENodeFields::Missing)
  408. .set(+ENodeFields::DiskSpaceUsage);
  409. const TFieldsType FieldsVDisks = TFieldsType().set(+ENodeFields::VDisks);
  410. const TFieldsType FieldsTablets = TFieldsType().set(+ENodeFields::Tablets);
  411. const TFieldsType FieldsHiveNodeStat = TFieldsType().set(+ENodeFields::SubDomainKey)
  412. .set(+ENodeFields::DisconnectTime);
  413. const std::unordered_map<ENodeFields, TFieldsType> DependentFields = {
  414. { ENodeFields::DC, TFieldsType().set(+ENodeFields::SystemState) },
  415. { ENodeFields::Rack, TFieldsType().set(+ENodeFields::SystemState) },
  416. { ENodeFields::Uptime, TFieldsType().set(+ENodeFields::SystemState) },
  417. { ENodeFields::Version, TFieldsType().set(+ENodeFields::SystemState) },
  418. { ENodeFields::Missing, TFieldsType().set(+ENodeFields::PDisks) },
  419. };
  420. bool FieldsNeeded(TFieldsType fields) const {
  421. return (FieldsRequired & (fields & ~FieldsAvailable)).any();
  422. }
  423. ENodeFields SortBy = ENodeFields::NodeId;
  424. bool ReverseSort = false;
  425. ENodeFields GroupBy = ENodeFields::NodeId;
  426. bool NeedFilter = false;
  427. bool NeedGroup = false;
  428. bool NeedSort = false;
  429. bool NeedLimit = false;
  430. ui64 TotalNodes = 0;
  431. ui64 FoundNodes = 0;
  432. bool NoRack = false;
  433. bool NoDC = false;
  434. std::vector<TString> Problems;
  435. void AddProblem(const TString& problem) {
  436. for (const auto& p : Problems) {
  437. if (p == problem) {
  438. return;
  439. }
  440. }
  441. Problems.push_back(problem);
  442. }
  443. static ENodeFields ParseENodeFields(TStringBuf field) {
  444. ENodeFields result = ENodeFields::COUNT;
  445. if (field == "NodeId" || field == "Id") {
  446. result = ENodeFields::NodeId;
  447. } else if (field == "Host") {
  448. result = ENodeFields::HostName;
  449. } else if (field == "DC") {
  450. result = ENodeFields::DC;
  451. } else if (field == "Rack") {
  452. result = ENodeFields::Rack;
  453. } else if (field == "Version") {
  454. result = ENodeFields::Version;
  455. } else if (field == "Uptime") {
  456. result = ENodeFields::Uptime;
  457. } else if (field == "Memory") {
  458. result = ENodeFields::Memory;
  459. } else if (field == "CPU") {
  460. result = ENodeFields::CPU;
  461. } else if (field == "LoadAverage") {
  462. result = ENodeFields::LoadAverage;
  463. } else if (field == "Missing") {
  464. result = ENodeFields::Missing;
  465. } else if (field == "DiskSpaceUsage") {
  466. result = ENodeFields::DiskSpaceUsage;
  467. } else if (field == "DisconnectTime") {
  468. result = ENodeFields::DisconnectTime;
  469. } else if (field == "Database") {
  470. result = ENodeFields::Database;
  471. } else if (field == "SubDomainKey") {
  472. result = ENodeFields::SubDomainKey;
  473. } else if (field == "SystemState") {
  474. result = ENodeFields::SystemState;
  475. } else if (field == "PDisks") {
  476. result = ENodeFields::PDisks;
  477. } else if (field == "VDisks") {
  478. result = ENodeFields::VDisks;
  479. } else if (field == "Tablets") {
  480. result = ENodeFields::Tablets;
  481. }
  482. return result;
  483. }
  484. public:
  485. TString GetLogPrefix() {
  486. static TString prefix = "json/nodes ";
  487. return prefix;
  488. }
  489. TJsonNodes(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev)
  490. : TBase(viewer, ev)
  491. {
  492. const auto& params(Event->Get()->Request.GetParams());
  493. JsonSettings.EnumAsNumbers = !FromStringWithDefault<bool>(params.Get("enums"), true);
  494. JsonSettings.UI64AsString = !FromStringWithDefault<bool>(params.Get("ui64"), false);
  495. InitConfig(params);
  496. Timeout = FromStringWithDefault<ui32>(params.Get("timeout"), 10000);
  497. FieldsRequired.set(+ENodeFields::NodeId);
  498. UptimeSeconds = FromStringWithDefault<ui32>(params.Get("uptime"), 0);
  499. ProblemNodesOnly = FromStringWithDefault<bool>(params.Get("problems_only"), ProblemNodesOnly);
  500. Filter = params.Get("filter");
  501. if (UptimeSeconds || ProblemNodesOnly || !Filter.empty()) {
  502. FieldsRequired.set(+ENodeFields::SystemState);
  503. }
  504. FilterPath = params.Get("path");
  505. if (FilterPath && !Database) {
  506. Database = FilterPath;
  507. }
  508. if (Database) {
  509. FilterDatabase = true;
  510. }
  511. if (FilterPath == Database) {
  512. FilterPath.clear();
  513. }
  514. OffloadMerge = FromStringWithDefault<bool>(params.Get("offload_merge"), OffloadMerge);
  515. OffloadMergeAttempts = FromStringWithDefault<bool>(params.Get("offload_merge_attempts"), OffloadMergeAttempts);
  516. Direct = FromStringWithDefault<bool>(params.Get("direct"), Direct);
  517. FilterStoragePool = params.Get("pool");
  518. if (FilterStoragePool.empty()) {
  519. FilterStoragePool = params.Get("storage_pool");
  520. }
  521. if (params.Has("group_id")) {
  522. FilterGroupIds.insert(FromStringWithDefault<ui32>(params.Get("group_id"), -1));
  523. }
  524. SplitIds(params.Get("node_id"), ',', FilterNodeIds);
  525. auto itZero = FilterNodeIds.find(0);
  526. if (itZero != FilterNodeIds.end()) {
  527. FilterNodeIds.erase(itZero);
  528. FilterNodeIds.insert(TlsActivationContext->ActorSystem()->NodeId);
  529. }
  530. if (params.Get("with") == "missing") {
  531. With = EWith::MissingDisks;
  532. FieldsRequired.set(+ENodeFields::Missing);
  533. } else if (params.Get("with") == "space") {
  534. With = EWith::SpaceProblems;
  535. FieldsRequired.set(+ENodeFields::DiskSpaceUsage);
  536. }
  537. if (params.Get("type") == "static") {
  538. Type = EType::Static;
  539. FieldsRequired.set(+ENodeFields::NodeInfo);
  540. } else if (params.Get("type") == "dynamic") {
  541. Type = EType::Dynamic;
  542. FieldsRequired.set(+ENodeFields::NodeInfo);
  543. } else if (params.Get("type") == "any") {
  544. Type = EType::Any;
  545. }
  546. NeedFilter = (With != EWith::Everything) || (Type != EType::Any) || !Filter.empty() || !FilterNodeIds.empty() || ProblemNodesOnly || UptimeSeconds > 0;
  547. if (params.Has("offset")) {
  548. Offset = FromStringWithDefault<ui32>(params.Get("offset"), 0);
  549. NeedLimit = true;
  550. }
  551. if (params.Has("limit")) {
  552. Limit = FromStringWithDefault<ui32>(params.Get("limit"), std::numeric_limits<ui32>::max());
  553. NeedLimit = true;
  554. }
  555. if (FromStringWithDefault<bool>(params.Get("storage"))) {
  556. FieldsRequired.set(+ENodeFields::PDisks);
  557. FieldsRequired.set(+ENodeFields::VDisks);
  558. }
  559. if (FromStringWithDefault<bool>(params.Get("tablets"))) {
  560. FieldsRequired.set(+ENodeFields::Tablets);
  561. }
  562. TStringBuf sort = params.Get("sort");
  563. if (sort) {
  564. NeedSort = true;
  565. if (sort.StartsWith("-") || sort.StartsWith("+")) {
  566. ReverseSort = (sort[0] == '-');
  567. sort.Skip(1);
  568. }
  569. SortBy = ParseENodeFields(sort);
  570. FieldsRequired.set(+SortBy);
  571. }
  572. TString fieldsRequired = params.Get("fields_required");
  573. if (!fieldsRequired.empty()) {
  574. if (fieldsRequired == "all") {
  575. FieldsRequired = FieldsAll;
  576. } else {
  577. TStringBuf source = fieldsRequired;
  578. for (TStringBuf value = source.NextTok(','); !value.empty(); value = source.NextTok(',')) {
  579. ENodeFields field = ParseENodeFields(value);
  580. if (field != ENodeFields::COUNT) {
  581. FieldsRequired.set(+field);
  582. }
  583. }
  584. }
  585. } else {
  586. FieldsRequired.set(+ENodeFields::SystemState);
  587. }
  588. TStringBuf group = params.Get("group");
  589. if (group) {
  590. NeedGroup = true;
  591. GroupBy = ParseENodeFields(group);
  592. FieldsRequired.set(+GroupBy);
  593. NeedSort = false;
  594. NeedLimit = false;
  595. }
  596. for (auto field = +ENodeFields::NodeId; field != +ENodeFields::COUNT; ++field) {
  597. if (FieldsRequired.test(field)) {
  598. auto itDependentFields = DependentFields.find(static_cast<ENodeFields>(field));
  599. if (itDependentFields != DependentFields.end()) {
  600. FieldsRequired |= itDependentFields->second;
  601. }
  602. }
  603. }
  604. if (FromStringWithDefault<bool>(params.Get("all_whiteboard_fields"), false)) {
  605. AllWhiteboardFields = true;
  606. }
  607. }
  608. void Bootstrap() override {
  609. if (TBase::NeedToRedirect()) {
  610. return;
  611. }
  612. if (FieldsNeeded(FieldsNodeInfo)) {
  613. NodesInfoResponse = MakeRequest<TEvInterconnect::TEvNodesInfo>(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
  614. NodeStateResponse = MakeWhiteboardRequest(TActivationContext::ActorSystem()->NodeId, new TEvWhiteboard::TEvNodeStateRequest());
  615. }
  616. if (FilterStoragePool || !FilterGroupIds.empty()) {
  617. FilterDatabase = false; // we disable database filter if we're filtering by pool or group
  618. }
  619. if (FilterDatabase) {
  620. if (!DatabaseNavigateResponse) {
  621. DatabaseNavigateResponse = MakeRequestSchemeCacheNavigate(Database, ENavigateRequestDatabase);
  622. }
  623. if (!FieldsNeeded(FieldsHiveNodeStat) && !(FilterPath && FieldsNeeded(FieldsTablets))) {
  624. DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(Database, EBoardInfoRequestDatabase);
  625. }
  626. }
  627. if (FilterPath && FieldsNeeded(FieldsTablets)) {
  628. PathNavigateResponse = MakeRequestSchemeCacheNavigate(FilterPath, ENavigateRequestPath);
  629. }
  630. if (FilterStoragePool) {
  631. StoragePoolsResponse = RequestBSControllerPools();
  632. GroupsResponse = RequestBSControllerGroups();
  633. VSlotsResponse = RequestBSControllerVSlots();
  634. FilterStorageStage = EFilterStorageStage::Pools;
  635. } else if (!FilterGroupIds.empty()) {
  636. VSlotsResponse = RequestBSControllerVSlots();
  637. FilterStorageStage = EFilterStorageStage::VSlots;
  638. }
  639. if (With != EWith::Everything) {
  640. PDisksResponse = RequestBSControllerPDisks();
  641. }
  642. if (ProblemNodesOnly || GroupBy == ENodeFields::Uptime) {
  643. FieldsRequired.set(+ENodeFields::SystemState);
  644. TTabletId rootHiveId = AppData()->DomainsInfo->GetHive();
  645. HivesToAsk.push_back(rootHiveId);
  646. if (!PDisksResponse) {
  647. PDisksResponse = RequestBSControllerPDisks();
  648. }
  649. }
  650. if (FieldsNeeded(FieldsHiveNodeStat) && !FilterDatabase && !FilterPath) {
  651. TTabletId rootHiveId = AppData()->DomainsInfo->GetHive();
  652. HivesToAsk.push_back(rootHiveId);
  653. }
  654. Schedule(TDuration::MilliSeconds(Timeout * 50 / 100), new TEvents::TEvWakeup(TimeoutTablets)); // 50% timeout (for tablets)
  655. TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup(TimeoutFinal));
  656. }
  657. void InvalidateNodes() {
  658. NodesByNodeId.clear();
  659. }
  660. void RebuildNodesByNodeId() {
  661. NodesByNodeId.clear();
  662. for (TNode* node : NodeView) {
  663. NodesByNodeId.emplace(node->GetNodeId(), node);
  664. }
  665. }
  666. TNode* FindNode(TNodeId nodeId) {
  667. if (NodesByNodeId.empty()) {
  668. RebuildNodesByNodeId();
  669. }
  670. auto itNode = NodesByNodeId.find(nodeId);
  671. if (itNode != NodesByNodeId.end()) {
  672. return itNode->second;
  673. }
  674. return nullptr;
  675. }
  676. bool PreFilterDone() const {
  677. return !FilterDatabase && FilterStorageStage == EFilterStorageStage::None;
  678. }
  679. bool FilterDone() const {
  680. return PreFilterDone() && !NeedFilter;
  681. }
  682. void ApplyFilter() {
  683. // database pre-filter, affects TotalNodes count
  684. if (FilterDatabase) {
  685. if (FilterSubDomainKey && FieldsAvailable.test(+ENodeFields::SubDomainKey)) {
  686. TNodeView nodeView;
  687. if (HasDatabaseNodes) {
  688. for (TNode* node : NodeView) {
  689. if (node->HasSubDomainKey(SubDomainKey)) {
  690. nodeView.push_back(node);
  691. }
  692. }
  693. } else {
  694. for (TNode* node : NodeView) {
  695. if (node->HasSubDomainKey(SharedSubDomainKey)) {
  696. nodeView.push_back(node);
  697. }
  698. }
  699. }
  700. NodeView.swap(nodeView);
  701. FoundNodes = TotalNodes = NodeView.size();
  702. InvalidateNodes();
  703. FilterDatabase = false;
  704. } else if (FieldsAvailable.test(+ENodeFields::Database)) {
  705. TNodeView nodeView;
  706. if (HasDatabaseNodes) {
  707. for (TNode* node : NodeView) {
  708. if (node->HasDatabase(Database)) {
  709. nodeView.push_back(node);
  710. }
  711. }
  712. } else {
  713. for (TNode* node : NodeView) {
  714. if (node->HasDatabase(SharedDatabase)) {
  715. nodeView.push_back(node);
  716. }
  717. }
  718. }
  719. NodeView.swap(nodeView);
  720. FoundNodes = TotalNodes = NodeView.size();
  721. InvalidateNodes();
  722. FilterDatabase = false;
  723. } else {
  724. return;
  725. }
  726. }
  727. // storage/nodes pre-filter, affects TotalNodes count
  728. if (FilterStorageStage != EFilterStorageStage::None) {
  729. return;
  730. }
  731. if (!FilterNodeIds.empty() && FieldsAvailable.test(+ENodeFields::NodeId)) {
  732. TNodeView nodeView;
  733. for (TNode* node : NodeView) {
  734. if (FilterNodeIds.count(node->GetNodeId()) > 0) {
  735. nodeView.push_back(node);
  736. }
  737. }
  738. NodeView.swap(nodeView);
  739. FoundNodes = TotalNodes = NodeView.size();
  740. InvalidateNodes();
  741. FilterNodeIds.clear();
  742. }
  743. if (NeedFilter) {
  744. if (With == EWith::MissingDisks && FieldsAvailable.test(+ENodeFields::Missing)) {
  745. TNodeView nodeView;
  746. for (TNode* node : NodeView) {
  747. if (node->MissingDisks != 0) {
  748. nodeView.push_back(node);
  749. }
  750. }
  751. NodeView.swap(nodeView);
  752. With = EWith::Everything;
  753. InvalidateNodes();
  754. }
  755. if (With == EWith::SpaceProblems && FieldsAvailable.test(+ENodeFields::DiskSpaceUsage)) {
  756. TNodeView nodeView;
  757. for (TNode* node : NodeView) {
  758. if (node->DiskSpaceUsage >= SpaceUsageProblem) {
  759. nodeView.push_back(node);
  760. }
  761. }
  762. NodeView.swap(nodeView);
  763. With = EWith::Everything;
  764. InvalidateNodes();
  765. }
  766. if (Type != EType::Any && FieldsAvailable.test(+ENodeFields::NodeInfo)) {
  767. TNodeView nodeView;
  768. for (TNode* node : NodeView) {
  769. if ((Type == EType::Static && node->IsStatic()) || (Type == EType::Dynamic && !node->IsStatic())) {
  770. nodeView.push_back(node);
  771. }
  772. }
  773. NodeView.swap(nodeView);
  774. Type = EType::Any;
  775. InvalidateNodes();
  776. }
  777. if (ProblemNodesOnly && FieldsAvailable.test(+ENodeFields::SystemState)) {
  778. TNodeView nodeView;
  779. for (TNode* node : NodeView) {
  780. if (node->GetOverall() != NKikimrWhiteboard::EFlag::Green) {
  781. nodeView.push_back(node);
  782. }
  783. }
  784. NodeView.swap(nodeView);
  785. ProblemNodesOnly = false;
  786. InvalidateNodes();
  787. }
  788. if (UptimeSeconds > 0 && FieldsAvailable.test(+ENodeFields::SystemState)) {
  789. ui64 limitSeconds = TInstant::Now().Seconds() - UptimeSeconds;
  790. TNodeView nodeView;
  791. for (TNode* node : NodeView) {
  792. if (node->SystemState.GetStartTime() >= limitSeconds) {
  793. nodeView.push_back(node);
  794. }
  795. }
  796. NodeView.swap(nodeView);
  797. UptimeSeconds = 0;
  798. InvalidateNodes();
  799. }
  800. if (!Filter.empty() && FieldsAvailable.test(+ENodeFields::NodeInfo)) {
  801. TVector<TString> filterWords = SplitString(Filter, " ");
  802. TNodeView nodeView;
  803. for (TNode* node : NodeView) {
  804. bool match = false;
  805. for (const TString& word : filterWords) {
  806. if (node->GetHostName().Contains(word)) {
  807. match = true;
  808. break;
  809. } else if (::ToString(node->GetNodeId()).Contains(word)) {
  810. match = true;
  811. break;
  812. }
  813. }
  814. if (match) {
  815. nodeView.push_back(node);
  816. }
  817. }
  818. NodeView.swap(nodeView);
  819. Filter.clear();
  820. InvalidateNodes();
  821. }
  822. NeedFilter = (With != EWith::Everything) || (Type != EType::Any) || !Filter.empty() || !FilterNodeIds.empty() || ProblemNodesOnly || UptimeSeconds > 0;
  823. FoundNodes = NodeView.size();
  824. }
  825. }
  826. template<typename F>
  827. void GroupCollection(F&& groupBy) {
  828. std::unordered_map<TString, size_t> nodeGroups;
  829. NodeGroups.clear();
  830. for (TNode* node : NodeView) {
  831. auto gb = groupBy(node);
  832. TNodeGroup* nodeGroup = nullptr;
  833. auto it = nodeGroups.find(gb);
  834. if (it == nodeGroups.end()) {
  835. nodeGroups.emplace(gb, NodeGroups.size());
  836. nodeGroup = &NodeGroups.emplace_back();
  837. nodeGroup->Name = gb;
  838. } else {
  839. nodeGroup = &NodeGroups[it->second];
  840. }
  841. nodeGroup->Nodes.push_back(node);
  842. }
  843. }
  844. void ApplyGroup() {
  845. if (FilterDone() && NeedGroup && FieldsAvailable.test(+GroupBy)) {
  846. switch (GroupBy) {
  847. case ENodeFields::NodeId:
  848. GroupCollection([](const TNode* node) { return ToString(node->GetNodeId()); });
  849. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  850. break;
  851. case ENodeFields::HostName:
  852. GroupCollection([](const TNode* node) { return node->GetHostName(); });
  853. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  854. break;
  855. case ENodeFields::Database:
  856. GroupCollection([](const TNode* node) { return node->Database; });
  857. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  858. break;
  859. case ENodeFields::DiskSpaceUsage:
  860. GroupCollection([](const TNode* node) { return node->GetDiskUsageForGroup(); });
  861. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  862. break;
  863. case ENodeFields::DC:
  864. GroupCollection([](const TNode* node) { return node->GetDataCenter(); });
  865. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  866. break;
  867. case ENodeFields::Rack:
  868. GroupCollection([](const TNode* node) { return node->GetRack(); });
  869. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  870. break;
  871. case ENodeFields::Missing:
  872. GroupCollection([](const TNode* node) { return ToString(node->MissingDisks); });
  873. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  874. break;
  875. case ENodeFields::Uptime:
  876. GroupCollection([now = TInstant::Now()](const TNode* node) { return node->GetUptimeForGroup(now); });
  877. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  878. break;
  879. case ENodeFields::Version:
  880. GroupCollection([](const TNode* node) { return node->GetVersionForGroup(); });
  881. SortCollection(NodeGroups, [](const TNodeGroup& nodeGroup) { return nodeGroup.Name; });
  882. break;
  883. case ENodeFields::NodeInfo:
  884. case ENodeFields::SystemState:
  885. case ENodeFields::PDisks:
  886. case ENodeFields::VDisks:
  887. case ENodeFields::Tablets:
  888. case ENodeFields::SubDomainKey:
  889. case ENodeFields::COUNT:
  890. case ENodeFields::Memory:
  891. case ENodeFields::CPU:
  892. case ENodeFields::LoadAverage:
  893. case ENodeFields::DisconnectTime:
  894. break;
  895. }
  896. NeedGroup = false;
  897. }
  898. }
  899. void ApplySort() {
  900. if (FilterDone() && NeedSort && FieldsAvailable.test(+SortBy)) {
  901. switch (SortBy) {
  902. case ENodeFields::NodeId:
  903. SortCollection(NodeView, [](const TNode* node) { return node->GetNodeId(); }, ReverseSort);
  904. break;
  905. case ENodeFields::HostName:
  906. SortCollection(NodeView, [](const TNode* node) { return node->GetHostName(); }, ReverseSort);
  907. break;
  908. case ENodeFields::DC:
  909. SortCollection(NodeView, [](const TNode* node) { return node->NodeInfo.Location.GetDataCenterId(); }, ReverseSort);
  910. break;
  911. case ENodeFields::Rack:
  912. SortCollection(NodeView, [](const TNode* node) { return node->NodeInfo.Location.GetRackId(); }, ReverseSort);
  913. break;
  914. case ENodeFields::Version:
  915. SortCollection(NodeView, [](const TNode* node) { return node->SystemState.GetVersion(); }, ReverseSort);
  916. break;
  917. case ENodeFields::Uptime:
  918. SortCollection(NodeView, [](const TNode* node) { return node->SystemState.GetStartTime(); }, ReverseSort);
  919. break;
  920. case ENodeFields::Memory:
  921. case ENodeFields::CPU:
  922. case ENodeFields::LoadAverage:
  923. case ENodeFields::Missing:
  924. case ENodeFields::DiskSpaceUsage:
  925. case ENodeFields::NodeInfo:
  926. case ENodeFields::SystemState:
  927. case ENodeFields::PDisks:
  928. case ENodeFields::VDisks:
  929. case ENodeFields::Tablets:
  930. case ENodeFields::SubDomainKey:
  931. case ENodeFields::Database:
  932. case ENodeFields::DisconnectTime:
  933. case ENodeFields::COUNT:
  934. break;
  935. }
  936. NeedSort = false;
  937. InvalidateNodes();
  938. }
  939. }
  940. void ApplyLimit() {
  941. if (FilterDone() && !NeedSort && !NeedGroup && NeedLimit) {
  942. if (Offset) {
  943. NodeView.erase(NodeView.begin(), NodeView.begin() + std::min(*Offset, NodeView.size()));
  944. InvalidateNodes();
  945. }
  946. if (Limit) {
  947. NodeView.resize(std::min(*Limit, NodeView.size()));
  948. InvalidateNodes();
  949. }
  950. NeedLimit = false;
  951. }
  952. }
  953. void ApplyEverything() {
  954. ApplyFilter();
  955. ApplyGroup();
  956. ApplySort();
  957. ApplyLimit();
  958. }
  959. static constexpr size_t BATCH_SIZE = 200;
  960. void BuildCandidates(TNodeBatch& batch, std::vector<TNode*>& candidates) {
  961. auto itCandidate = candidates.begin();
  962. for (; itCandidate != candidates.end() && batch.NodesToAskFor.size() < OffloadMergeAttempts; ++itCandidate) {
  963. batch.NodesToAskFor.push_back(*itCandidate);
  964. }
  965. candidates.erase(candidates.begin(), itCandidate);
  966. for (TNode* node : batch.NodesToAskAbout) {
  967. if (node->IsStatic()) {
  968. batch.HasStaticNodes = true;
  969. }
  970. }
  971. }
  972. void SplitBatch(TNodeBatch& nodeBatch, std::vector<TNodeBatch>& batches) {
  973. std::vector<TNode*> candidates = nodeBatch.NodesToAskAbout;
  974. std::sort(candidates.begin(), candidates.end(), [](TNode* a, TNode* b) {
  975. return a->GetCandidateScore() > b->GetCandidateScore();
  976. });
  977. while (nodeBatch.NodesToAskAbout.size() > BATCH_SIZE) {
  978. TNodeBatch newBatch;
  979. size_t splitSize = std::min(BATCH_SIZE, nodeBatch.NodesToAskAbout.size() / 2);
  980. newBatch.NodesToAskAbout.reserve(splitSize);
  981. for (size_t i = 0; i < splitSize; ++i) {
  982. newBatch.NodesToAskAbout.push_back(nodeBatch.NodesToAskAbout.back());
  983. nodeBatch.NodesToAskAbout.pop_back();
  984. }
  985. BuildCandidates(newBatch, candidates);
  986. batches.emplace_back(std::move(newBatch));
  987. }
  988. if (!nodeBatch.NodesToAskAbout.empty()) {
  989. BuildCandidates(nodeBatch, candidates);
  990. batches.emplace_back(std::move(nodeBatch));
  991. }
  992. }
  993. std::vector<TNodeBatch> BatchNodes() {
  994. std::vector<TNodeBatch> batches;
  995. if (OffloadMerge) {
  996. std::unordered_map<TSubDomainKey, TNodeBatch> batchSubDomain;
  997. std::unordered_map<TString, TNodeBatch> batchDataCenters;
  998. for (TNode* node : NodeView) {
  999. if (node->IsStatic()) {
  1000. batchDataCenters[node->GetDataCenter()].NodesToAskAbout.push_back(node);
  1001. } else {
  1002. batchSubDomain[node->SubDomainKey].NodesToAskAbout.push_back(node);
  1003. }
  1004. }
  1005. for (auto& [subDomainKey, nodeBatch] : batchSubDomain) {
  1006. if (nodeBatch.NodesToAskAbout.size() == 1) {
  1007. TNode* node = nodeBatch.NodesToAskAbout.front();
  1008. batchDataCenters[node->GetDataCenter()].NodesToAskAbout.push_back(node);
  1009. } else {
  1010. SplitBatch(nodeBatch, batches);
  1011. }
  1012. }
  1013. for (auto& [dataCenter, nodeBatch] : batchDataCenters) {
  1014. SplitBatch(nodeBatch, batches);
  1015. }
  1016. } else {
  1017. TNodeBatch nodeBatch;
  1018. for (TNode* node : NodeView) {
  1019. nodeBatch.NodesToAskAbout.push_back(node);
  1020. }
  1021. SplitBatch(nodeBatch, batches);
  1022. }
  1023. return batches;
  1024. }
  1025. bool HiveResponsesDone() const {
  1026. for (const auto& [hiveId, hiveNodeStats] : HiveNodeStats) {
  1027. if (!hiveNodeStats.IsDone()) {
  1028. return false;
  1029. }
  1030. }
  1031. return !HiveNodeStats.empty();
  1032. }
  1033. bool TimeToAskHive() {
  1034. if (NodesInfoResponse && !NodesInfoResponse->IsDone()) {
  1035. return false;
  1036. }
  1037. if (DatabaseNavigateResponse && !DatabaseNavigateResponse->IsDone()) {
  1038. return false;
  1039. }
  1040. if (ResourceNavigateResponse && !ResourceNavigateResponse->IsDone()) {
  1041. return false;
  1042. }
  1043. if (PathNavigateResponse && !PathNavigateResponse->IsDone()) {
  1044. return false;
  1045. }
  1046. return CurrentTimeoutState < TimeoutTablets;
  1047. }
  1048. bool TimeToAskWhiteboard() {
  1049. if (NodesInfoResponse && !NodesInfoResponse->IsDone()) {
  1050. return false;
  1051. }
  1052. if (NodeStateResponse && !NodeStateResponse->IsDone()) {
  1053. return false;
  1054. }
  1055. if (DatabaseNavigateResponse && !DatabaseNavigateResponse->IsDone()) {
  1056. return false;
  1057. }
  1058. if (ResourceNavigateResponse && !ResourceNavigateResponse->IsDone()) {
  1059. return false;
  1060. }
  1061. if (PathNavigateResponse && !PathNavigateResponse->IsDone()) {
  1062. return false;
  1063. }
  1064. if (DatabaseBoardInfoResponse && !DatabaseBoardInfoResponse->IsDone()) {
  1065. return false;
  1066. }
  1067. if (ResourceBoardInfoResponse && !ResourceBoardInfoResponse->IsDone()) {
  1068. return false;
  1069. }
  1070. for (const auto& [hiveId, hiveNodeStats] : HiveNodeStats) {
  1071. if (!hiveNodeStats.IsDone()) {
  1072. AddEvent("HiveNodeStats not done");
  1073. return false;
  1074. }
  1075. }
  1076. if (StoragePoolsResponse && !StoragePoolsResponse->IsDone()) {
  1077. return false;
  1078. }
  1079. if (GroupsResponse && !GroupsResponse->IsDone()) {
  1080. return false;
  1081. }
  1082. if (VSlotsResponse && !VSlotsResponse->IsDone()) {
  1083. return false;
  1084. }
  1085. if (PDisksResponse && !PDisksResponse->IsDone()) {
  1086. return false;
  1087. }
  1088. if (!SystemStateResponse.empty() || !TabletStateResponse.empty() || !PDiskStateResponse.empty()
  1089. || !VDiskStateResponse.empty() || !SystemViewerResponse.empty() || !TabletViewerResponse.empty()) {
  1090. return false;
  1091. }
  1092. return CurrentTimeoutState < TimeoutFinal;
  1093. }
  1094. static TString GetDatabaseFromEndpointsBoardPath(const TString& path) {
  1095. TStringBuf db(path);
  1096. db.SkipPrefix("gpc+");
  1097. return TString(db);
  1098. }
  1099. void ProcessResponses() {
  1100. AddEvent("ProcessResponses");
  1101. if (NodesInfoResponse && NodesInfoResponse->IsDone()) {
  1102. if (NodesInfoResponse->IsOk()) {
  1103. bool seenDC = false;
  1104. bool seenRack = false;
  1105. for (const auto& ni : NodesInfoResponse->Get()->Nodes) {
  1106. TNode& node = NodeData.emplace_back();
  1107. node.NodeInfo = ni;
  1108. if (ni.Host && !node.SystemState.GetHost()) {
  1109. node.SystemState.SetHost(ni.Host);
  1110. }
  1111. if (ni.Location.GetDataCenterId() != 0) {
  1112. seenDC = true;
  1113. }
  1114. if (ni.Location.GetRackId() != 0) {
  1115. seenRack = true;
  1116. }
  1117. }
  1118. for (TNode& node : NodeData) {
  1119. NodeView.emplace_back(&node);
  1120. }
  1121. InvalidateNodes();
  1122. FieldsAvailable |= FieldsNodeInfo;
  1123. FoundNodes = TotalNodes = NodeView.size();
  1124. NoDC = !seenDC;
  1125. NoRack = !seenRack;
  1126. } else {
  1127. AddProblem("no-nodes-info");
  1128. }
  1129. NodesInfoResponse.reset();
  1130. }
  1131. if (NodeStateResponse && NodeStateResponse->IsDone() && TotalNodes > 0) {
  1132. if (NodeStateResponse->IsOk()) {
  1133. for (const auto& nodeStateInfo : NodeStateResponse->Get()->Record.GetNodeStateInfo()) {
  1134. if (nodeStateInfo.GetConnected()) {
  1135. TNodeId nodeId = FromStringWithDefault(TStringBuf(nodeStateInfo.GetPeerName()).Before(':'), 0);
  1136. if (nodeId) {
  1137. TNode* node = FindNode(nodeId);
  1138. if (node) {
  1139. node->Connected = true;
  1140. }
  1141. }
  1142. }
  1143. }
  1144. } else {
  1145. AddProblem("no-node-state-info");
  1146. }
  1147. NodeStateResponse.reset();
  1148. }
  1149. if (DatabaseNavigateResponse && DatabaseNavigateResponse->IsDone()) { // database hive and subdomain key
  1150. if (DatabaseNavigateResponse->IsOk()) {
  1151. auto* ev = DatabaseNavigateResponse->Get();
  1152. if (ev->Request->ResultSet.size() == 1 && ev->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
  1153. TSchemeCacheNavigate::TEntry& entry(ev->Request->ResultSet.front());
  1154. if (entry.DomainInfo) {
  1155. if (entry.DomainInfo->ResourcesDomainKey && entry.DomainInfo->DomainKey != entry.DomainInfo->ResourcesDomainKey) {
  1156. TPathId resourceDomainKey(entry.DomainInfo->ResourcesDomainKey);
  1157. ResourceNavigateResponse = MakeRequestSchemeCacheNavigate(resourceDomainKey, ENavigateRequestResource);
  1158. }
  1159. if (FieldsNeeded(FieldsHiveNodeStat) || (FilterPath && FieldsNeeded(FieldsTablets))) {
  1160. const auto ownerId = entry.DomainInfo->DomainKey.OwnerId;
  1161. const auto localPathId = entry.DomainInfo->DomainKey.LocalPathId;
  1162. SubDomainKey = TSubDomainKey(ownerId, localPathId);
  1163. if (FilterDatabase) {
  1164. FilterSubDomainKey = true;
  1165. }
  1166. HivesToAsk.push_back(AppData()->DomainsInfo->GetHive());
  1167. if (entry.DomainInfo->Params.HasHive()) {
  1168. HivesToAsk.push_back(entry.DomainInfo->Params.GetHive());
  1169. }
  1170. }
  1171. }
  1172. }
  1173. } else {
  1174. NodeView.clear();
  1175. AddProblem("no-database-info");
  1176. }
  1177. DatabaseNavigateResponse.reset();
  1178. }
  1179. if (ResourceNavigateResponse && ResourceNavigateResponse->IsDone()) { // database hive and subdomain key
  1180. if (ResourceNavigateResponse->IsOk()) {
  1181. auto* ev = ResourceNavigateResponse->Get();
  1182. if (ev->Request->ResultSet.size() == 1 && ev->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
  1183. TSchemeCacheNavigate::TEntry& entry(ev->Request->ResultSet.front());
  1184. auto path = CanonizePath(entry.Path);
  1185. SharedDatabase = path;
  1186. if (FieldsNeeded(FieldsHiveNodeStat) || (FilterPath && FieldsNeeded(FieldsTablets))) {
  1187. HivesToAsk.push_back(AppData()->DomainsInfo->GetHive());
  1188. if (entry.DomainInfo) {
  1189. const auto ownerId = entry.DomainInfo->DomainKey.OwnerId;
  1190. const auto localPathId = entry.DomainInfo->DomainKey.LocalPathId;
  1191. SharedSubDomainKey = TSubDomainKey(ownerId, localPathId);
  1192. if (FilterDatabase) {
  1193. FilterSubDomainKey = true;
  1194. }
  1195. if (entry.DomainInfo->Params.HasHive()) {
  1196. HivesToAsk.push_back(entry.DomainInfo->Params.GetHive());
  1197. }
  1198. }
  1199. } else {
  1200. ResourceBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(path, EBoardInfoRequestResource);
  1201. }
  1202. }
  1203. } else {
  1204. NodeView.clear();
  1205. AddProblem("no-shared-database-info");
  1206. }
  1207. ResourceNavigateResponse.reset();
  1208. }
  1209. if (PathNavigateResponse && PathNavigateResponse->IsDone()) { // filter path id
  1210. if (PathNavigateResponse->IsOk()) {
  1211. auto* ev = PathNavigateResponse->Get();
  1212. if (ev->Request->ResultSet.size() == 1 && ev->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
  1213. TSchemeCacheNavigate::TEntry& entry(ev->Request->ResultSet.front());
  1214. if (entry.Self) {
  1215. FilterPathId = TPathId(entry.Self->Info.GetSchemeshardId(), entry.Self->Info.GetPathId());
  1216. AskHiveAboutPaths = true;
  1217. HivesToAsk.push_back(AppData()->DomainsInfo->GetHive());
  1218. if (entry.DomainInfo) {
  1219. const auto ownerId = entry.DomainInfo->DomainKey.OwnerId;
  1220. const auto localPathId = entry.DomainInfo->DomainKey.LocalPathId;
  1221. SubDomainKey = TSubDomainKey(ownerId, localPathId);
  1222. if (FilterDatabase) {
  1223. FilterSubDomainKey = true;
  1224. }
  1225. if (entry.DomainInfo->Params.HasHive()) {
  1226. HivesToAsk.push_back(entry.DomainInfo->Params.GetHive());
  1227. }
  1228. }
  1229. }
  1230. }
  1231. } else {
  1232. AddProblem("no-path-info");
  1233. }
  1234. PathNavigateResponse.reset();
  1235. }
  1236. if (DatabaseBoardInfoResponse && DatabaseBoardInfoResponse->IsDone() && TotalNodes > 0) {
  1237. if (DatabaseBoardInfoResponse->IsOk() && DatabaseBoardInfoResponse->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) {
  1238. TString database = GetDatabaseFromEndpointsBoardPath(DatabaseBoardInfoResponse->Get()->Path);
  1239. for (const auto& entry : DatabaseBoardInfoResponse->Get()->InfoEntries) {
  1240. if (!entry.second.Dropped) {
  1241. TNode* node = FindNode(entry.first.NodeId());
  1242. if (node) {
  1243. node->Database = database;
  1244. node->GotDatabaseFromDatabaseBoardInfo = true;
  1245. HasDatabaseNodes = true;
  1246. }
  1247. }
  1248. }
  1249. FieldsAvailable.set(+ENodeFields::Database);
  1250. } else {
  1251. AddProblem("no-database-board-info");
  1252. }
  1253. DatabaseBoardInfoResponse.reset();
  1254. }
  1255. if (ResourceBoardInfoResponse && ResourceBoardInfoResponse->IsDone() && TotalNodes > 0) {
  1256. if (ResourceBoardInfoResponse->IsOk() && ResourceBoardInfoResponse->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) {
  1257. TString database = GetDatabaseFromEndpointsBoardPath(ResourceBoardInfoResponse->Get()->Path);
  1258. for (const auto& entry : ResourceBoardInfoResponse->Get()->InfoEntries) {
  1259. if (!entry.second.Dropped) {
  1260. TNode* node = FindNode(entry.first.NodeId());
  1261. if (node) {
  1262. node->Database = database;
  1263. node->GotDatabaseFromResourceBoardInfo = true;
  1264. }
  1265. }
  1266. }
  1267. FieldsAvailable.set(+ENodeFields::Database);
  1268. } else {
  1269. AddProblem("no-shared-database-board-info");
  1270. }
  1271. ResourceBoardInfoResponse.reset();
  1272. }
  1273. if (TimeToAskHive() && !HivesToAsk.empty()) {
  1274. AddEvent("TimeToAskHive");
  1275. std::sort(HivesToAsk.begin(), HivesToAsk.end());
  1276. HivesToAsk.erase(std::unique(HivesToAsk.begin(), HivesToAsk.end()), HivesToAsk.end());
  1277. for (TTabletId hiveId : HivesToAsk) {
  1278. auto request = std::make_unique<TEvHive::TEvRequestHiveNodeStats>();
  1279. request->Record.SetReturnMetrics(true);
  1280. if (Database) { // it's better to ask hive about tablets only if we're filtering by database
  1281. request->Record.SetReturnExtendedTabletInfo(true);
  1282. }
  1283. if (AskHiveAboutPaths) {
  1284. request->Record.SetFilterTabletsBySchemeShardId(FilterPathId.OwnerId);
  1285. request->Record.SetFilterTabletsByPathId(FilterPathId.LocalPathId);
  1286. }
  1287. HiveNodeStats.emplace(hiveId, MakeRequestHiveNodeStats(hiveId, request.release()));
  1288. }
  1289. HivesToAsk.clear();
  1290. }
  1291. if (HiveResponsesDone()) {
  1292. AddEvent("HiveResponsesDone");
  1293. for (const auto& [hiveId, nodeStats] : HiveNodeStats) {
  1294. if (nodeStats.IsDone()) {
  1295. if (nodeStats.IsOk()) {
  1296. for (const NKikimrHive::THiveNodeStats& nodeStats : nodeStats.Get()->Record.GetNodeStats()) {
  1297. ui32 nodeId = nodeStats.GetNodeId();
  1298. TNode* node = FindNode(nodeId);
  1299. if (node) {
  1300. if (Database) { // it's better to ask hive about tablets only if we're filtering by database
  1301. for (const NKikimrHive::THiveDomainStatsStateCount& stateStats : nodeStats.GetStateStats()) {
  1302. NKikimrViewer::TTabletStateInfo& viewerTablet(node->Tablets.emplace_back());
  1303. viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(stateStats.GetTabletType()));
  1304. viewerTablet.SetCount(stateStats.GetCount());
  1305. viewerTablet.SetState(GetFlagFromTabletState(stateStats.GetVolatileState()));
  1306. FieldsAvailable.set(+ENodeFields::Tablets);
  1307. }
  1308. }
  1309. if (nodeStats.HasLastAliveTimestamp()) {
  1310. node->SystemState.SetDisconnectTime(std::max(node->SystemState.GetDisconnectTime(), nodeStats.GetLastAliveTimestamp() / 1000)); // seconds
  1311. FieldsAvailable.set(+ENodeFields::DisconnectTime);
  1312. }
  1313. if (nodeStats.HasNodeDomain()) {
  1314. node->SubDomainKey = TSubDomainKey(nodeStats.GetNodeDomain());
  1315. FieldsAvailable.set(+ENodeFields::SubDomainKey);
  1316. if (node->SubDomainKey == SubDomainKey) {
  1317. HasDatabaseNodes = true;
  1318. }
  1319. }
  1320. }
  1321. }
  1322. } else {
  1323. AddProblem("hive-no-data");
  1324. }
  1325. }
  1326. }
  1327. HiveNodeStats.clear();
  1328. }
  1329. if (FilterStorageStage == EFilterStorageStage::Pools && StoragePoolsResponse && StoragePoolsResponse->IsDone()) {
  1330. if (StoragePoolsResponse->IsOk()) {
  1331. for (const auto& storagePoolEntry : StoragePoolsResponse->Get()->Record.GetEntries()) {
  1332. if (storagePoolEntry.GetInfo().GetName() == FilterStoragePool) {
  1333. FilterStoragePoolId = {storagePoolEntry.GetKey().GetBoxId(), storagePoolEntry.GetKey().GetStoragePoolId()};
  1334. break;
  1335. }
  1336. }
  1337. FilterStorageStage = EFilterStorageStage::Groups;
  1338. } else {
  1339. AddProblem("bsc-storage-pools-no-data");
  1340. }
  1341. StoragePoolsResponse.reset();
  1342. }
  1343. if (FilterStorageStage == EFilterStorageStage::Groups && GroupsResponse && GroupsResponse->IsDone()) {
  1344. if (GroupsResponse->IsOk()) {
  1345. for (const auto& groupEntry : GroupsResponse->Get()->Record.GetEntries()) {
  1346. if (groupEntry.GetInfo().GetBoxId() == FilterStoragePoolId.first
  1347. && groupEntry.GetInfo().GetStoragePoolId() == FilterStoragePoolId.second) {
  1348. FilterGroupIds.insert(groupEntry.GetKey().GetGroupId());
  1349. }
  1350. }
  1351. FilterStorageStage = EFilterStorageStage::VSlots;
  1352. } else {
  1353. AddProblem("bsc-storage-groups-no-data");
  1354. }
  1355. GroupsResponse.reset();
  1356. }
  1357. if (FilterStorageStage == EFilterStorageStage::VSlots && VSlotsResponse && VSlotsResponse->IsDone()) {
  1358. if (VSlotsResponse->IsOk()) {
  1359. std::unordered_map<std::pair<TNodeId, ui32>, std::size_t> slotsPerDisk;
  1360. for (const auto& slotEntry : VSlotsResponse->Get()->Record.GetEntries()) {
  1361. if (FilterGroupIds.count(slotEntry.GetInfo().GetGroupId()) > 0) {
  1362. FilterNodeIds.insert(slotEntry.GetKey().GetNodeId());
  1363. TNode* node = FindNode(slotEntry.GetKey().GetNodeId());
  1364. if (node) {
  1365. node->SysViewVDisks.emplace_back(slotEntry);
  1366. node->HasDisks = true;
  1367. }
  1368. }
  1369. TNode* node = FindNode(slotEntry.GetKey().GetNodeId());
  1370. if (node) {
  1371. node->HasDisks = true;
  1372. }
  1373. auto& slots = slotsPerDisk[{slotEntry.GetKey().GetNodeId(), slotEntry.GetKey().GetPDiskId()}];
  1374. ++slots;
  1375. MaximumSlotsPerDisk = std::max(MaximumSlotsPerDisk.value_or(0), slots);
  1376. }
  1377. FilterStorageStage = EFilterStorageStage::None;
  1378. ApplyEverything();
  1379. } else {
  1380. AddProblem("bsc-storage-slots-no-data");
  1381. }
  1382. VSlotsResponse.reset();
  1383. }
  1384. if (PDisksResponse && PDisksResponse->IsDone()) {
  1385. if (PDisksResponse->IsOk()) {
  1386. std::unordered_map<TNodeId, std::size_t> disksPerNode;
  1387. for (const auto& pdiskEntry : PDisksResponse->Get()->Record.GetEntries()) {
  1388. TNode* node = FindNode(pdiskEntry.GetKey().GetNodeId());
  1389. if (node) {
  1390. node->SysViewPDisks.emplace_back(pdiskEntry);
  1391. node->HasDisks = true;
  1392. }
  1393. auto& disks = disksPerNode[pdiskEntry.GetKey().GetNodeId()];
  1394. ++disks;
  1395. MaximumDisksPerNode = std::max(MaximumDisksPerNode.value_or(0), disks);
  1396. }
  1397. for (TNode* node : NodeView) {
  1398. node->CalcDisks();
  1399. }
  1400. FieldsAvailable.set(+ENodeFields::Missing);
  1401. FieldsAvailable.set(+ENodeFields::DiskSpaceUsage);
  1402. } else {
  1403. AddProblem("bsc-pdisks-no-data");
  1404. }
  1405. PDisksResponse.reset();
  1406. }
  1407. if (TimeToAskWhiteboard() && FieldsAvailable.test(+ENodeFields::NodeInfo)) {
  1408. AddEvent("TimeToAskWhiteboard");
  1409. ApplyEverything();
  1410. if (FilterDatabase) {
  1411. FieldsRequired.set(+ENodeFields::SystemState);
  1412. }
  1413. std::vector<TNodeBatch> batches = BatchNodes();
  1414. SendWhiteboardRequests(batches);
  1415. }
  1416. }
  1417. template<typename TWhiteboardEvent>
  1418. void InitWhiteboardRequest(TWhiteboardEvent* request) {
  1419. if (AllWhiteboardFields) {
  1420. request->AddFieldsRequired(-1);
  1421. }
  1422. }
  1423. void SendWhiteboardSystemAndTabletsBatch(TNodeBatch& batch) {
  1424. TNodeId nodeId = OffloadMerge ? batch.ChooseNodeId() : 0;
  1425. if (batch.HasStaticNodes && (FieldsNeeded(FieldsVDisks) || FieldsNeeded(FieldsPDisks))) {
  1426. nodeId = 0; // we need to ask for all nodes anyway
  1427. }
  1428. if (nodeId) {
  1429. if (FieldsNeeded(FieldsSystemState) && SystemViewerResponse.count(nodeId) == 0) {
  1430. auto viewerRequest = std::make_unique<TEvViewer::TEvViewerRequest>();
  1431. InitWhiteboardRequest(viewerRequest->Record.MutableSystemRequest());
  1432. viewerRequest->Record.SetTimeout(Timeout / 2);
  1433. for (const TNode* node : batch.NodesToAskAbout) {
  1434. viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId());
  1435. }
  1436. SystemViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release()));
  1437. NodeBatches.emplace(nodeId, batch);
  1438. ++WhiteboardStateRequestsInFlight;
  1439. }
  1440. if (FieldsNeeded(FieldsTablets) && TabletViewerResponse.count(nodeId) == 0) {
  1441. auto viewerRequest = std::make_unique<TEvViewer::TEvViewerRequest>();
  1442. viewerRequest->Record.MutableTabletRequest()->SetGroupBy("Type,State");
  1443. viewerRequest->Record.SetTimeout(Timeout / 2);
  1444. for (const TNode* node : batch.NodesToAskAbout) {
  1445. viewerRequest->Record.MutableLocation()->AddNodeId(node->GetNodeId());
  1446. }
  1447. TabletViewerResponse.emplace(nodeId, MakeViewerRequest(nodeId, viewerRequest.release()));
  1448. NodeBatches.emplace(nodeId, batch);
  1449. ++WhiteboardStateRequestsInFlight;
  1450. }
  1451. } else {
  1452. for (const TNode* node : batch.NodesToAskAbout) {
  1453. if (node->Disconnected) {
  1454. continue;
  1455. }
  1456. TNodeId nodeId = node->GetNodeId();
  1457. if (FieldsNeeded(FieldsSystemState)) {
  1458. if (SystemStateResponse.count(nodeId) == 0) {
  1459. auto request = new TEvWhiteboard::TEvSystemStateRequest();
  1460. InitWhiteboardRequest(&request->Record);
  1461. SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request));
  1462. ++WhiteboardStateRequestsInFlight;
  1463. }
  1464. }
  1465. if (FieldsNeeded(FieldsTablets)) {
  1466. if (TabletStateResponse.count(nodeId) == 0) {
  1467. auto request = std::make_unique<TEvWhiteboard::TEvTabletStateRequest>();
  1468. request->Record.SetGroupBy("Type,State");
  1469. TabletStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release()));
  1470. ++WhiteboardStateRequestsInFlight;
  1471. }
  1472. }
  1473. }
  1474. }
  1475. }
  1476. void SendWhiteboardRequest(TNodeBatch& batch) {
  1477. SendWhiteboardSystemAndTabletsBatch(batch);
  1478. for (const TNode* node : batch.NodesToAskAbout) {
  1479. TNodeId nodeId = node->GetNodeId();
  1480. if (node->IsStatic()) {
  1481. if (FieldsNeeded(FieldsVDisks)) {
  1482. if (VDiskStateResponse.count(nodeId) == 0) {
  1483. auto request = new TEvWhiteboard::TEvVDiskStateRequest();
  1484. InitWhiteboardRequest(&request->Record);
  1485. VDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request));
  1486. ++WhiteboardStateRequestsInFlight;
  1487. }
  1488. }
  1489. if (FieldsNeeded(FieldsPDisks)) {
  1490. if (PDiskStateResponse.count(nodeId) == 0) {
  1491. auto request = new TEvWhiteboard::TEvPDiskStateRequest();
  1492. InitWhiteboardRequest(&request->Record);
  1493. PDiskStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request));
  1494. ++WhiteboardStateRequestsInFlight;
  1495. }
  1496. }
  1497. }
  1498. }
  1499. }
  1500. void SendWhiteboardRequests(std::vector<TNodeBatch>& batches) {
  1501. for (TNodeBatch& batch : batches) {
  1502. SendWhiteboardRequest(batch);
  1503. }
  1504. }
  1505. void ProcessWhiteboard() {
  1506. if (FieldsNeeded(FieldsSystemState)) {
  1507. std::unordered_set<TNodeId> removeNodes;
  1508. for (const auto& [responseNodeId, response] : SystemViewerResponse) {
  1509. if (response.IsOk()) {
  1510. const auto& systemResponse(response.Get()->Record.GetSystemResponse());
  1511. for (const auto& systemInfo : systemResponse.GetSystemStateInfo()) {
  1512. TNodeId nodeId = systemInfo.GetNodeId();
  1513. TNode* node = FindNode(nodeId);
  1514. if (node) {
  1515. node->SystemState.MergeFrom(systemInfo);
  1516. node->Cleanup();
  1517. node->CalcDatabase();
  1518. if (Database && node->Database) {
  1519. if (node->Database != Database && (!SharedDatabase || node->Database != SharedDatabase)) {
  1520. removeNodes.insert(nodeId);
  1521. }
  1522. }
  1523. }
  1524. }
  1525. }
  1526. }
  1527. for (const auto& [nodeId, response] : SystemStateResponse) {
  1528. if (response.IsOk()) {
  1529. const auto& systemState(response.Get()->Record);
  1530. if (systemState.SystemStateInfoSize() > 0) {
  1531. TNode* node = FindNode(nodeId);
  1532. if (node) {
  1533. node->SystemState.MergeFrom(systemState.GetSystemStateInfo(0));
  1534. node->Cleanup();
  1535. node->CalcDatabase();
  1536. if (Database && node->Database) {
  1537. if (node->Database != Database && (!SharedDatabase || node->Database != SharedDatabase)) {
  1538. removeNodes.insert(nodeId);
  1539. }
  1540. }
  1541. }
  1542. }
  1543. }
  1544. }
  1545. if (!removeNodes.empty()) {
  1546. NodeView.erase(std::remove_if(NodeView.begin(), NodeView.end(), [&removeNodes](const TNode* node) { return removeNodes.count(node->GetNodeId()) > 0; }), NodeView.end());
  1547. TotalNodes = FoundNodes = NodeView.size();
  1548. InvalidateNodes();
  1549. }
  1550. FieldsAvailable |= FieldsSystemState;
  1551. FieldsAvailable.set(+ENodeFields::Database);
  1552. }
  1553. if (FieldsNeeded(FieldsTablets)) {
  1554. for (auto& [nodeId, response] : TabletViewerResponse) {
  1555. if (response.IsOk()) {
  1556. auto& tabletResponse(*(response.Get()->Record.MutableTabletResponse()));
  1557. if (tabletResponse.TabletStateInfoSize() > 0 && !tabletResponse.GetTabletStateInfo(0).HasCount()) {
  1558. GroupWhiteboardResponses(tabletResponse, "NodeId,Type,State");
  1559. }
  1560. for (const auto& tabletState : tabletResponse.GetTabletStateInfo()) {
  1561. TNode* node = FindNode(tabletState.GetNodeId());
  1562. if (node) {
  1563. if (tabletState.GetState() != NKikimrWhiteboard::TTabletStateInfo::Dead) {
  1564. NKikimrViewer::TTabletStateInfo& viewerTablet(node->Tablets.emplace_back());
  1565. viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(tabletState.GetType()));
  1566. viewerTablet.SetState(GetFlagFromTabletState(tabletState.GetState()));
  1567. viewerTablet.SetCount(tabletState.GetCount());
  1568. }
  1569. }
  1570. }
  1571. }
  1572. }
  1573. for (auto& [nodeId, response] : TabletStateResponse) {
  1574. if (response.IsOk()) {
  1575. const auto& tabletState(response.Get()->Record);
  1576. TNode* node = FindNode(nodeId);
  1577. if (node) {
  1578. for (const auto& protoTabletState : tabletState.GetTabletStateInfo()) {
  1579. if (protoTabletState.GetState() != NKikimrWhiteboard::TTabletStateInfo::Dead) {
  1580. NKikimrViewer::TTabletStateInfo& viewerTablet(node->Tablets.emplace_back());
  1581. viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(protoTabletState.GetType()));
  1582. viewerTablet.SetState(GetFlagFromTabletState(protoTabletState.GetState()));
  1583. viewerTablet.SetCount(protoTabletState.GetCount());
  1584. }
  1585. }
  1586. }
  1587. }
  1588. }
  1589. FieldsAvailable |= FieldsTablets;
  1590. }
  1591. if (FieldsNeeded(FieldsVDisks)) {
  1592. for (const auto& [nodeId, response] : VDiskStateResponse) {
  1593. if (response.IsOk()) {
  1594. const auto& vDiskState(response.Get()->Record);
  1595. TNode* node = FindNode(nodeId);
  1596. if (node) {
  1597. for (const auto& protoVDiskState : vDiskState.GetVDiskStateInfo()) {
  1598. node->VDisks.emplace_back(protoVDiskState);
  1599. }
  1600. }
  1601. }
  1602. }
  1603. FieldsAvailable |= FieldsVDisks;
  1604. }
  1605. if (FieldsNeeded(FieldsPDisks)) {
  1606. for (const auto& [nodeId, response] : PDiskStateResponse) {
  1607. if (response.IsOk()) {
  1608. const auto& pDiskState(response.Get()->Record);
  1609. TNode* node = FindNode(nodeId);
  1610. if (node) {
  1611. for (const auto& protoPDiskState : pDiskState.GetPDiskStateInfo()) {
  1612. node->PDisks.emplace_back(protoPDiskState);
  1613. }
  1614. node->CalcDisks();
  1615. }
  1616. MaximumDisksPerNode = std::max(MaximumDisksPerNode.value_or(0), pDiskState.PDiskStateInfoSize());
  1617. }
  1618. }
  1619. FieldsAvailable |= FieldsPDisks;
  1620. FieldsAvailable.set(+ENodeFields::Missing);
  1621. FieldsAvailable.set(+ENodeFields::DiskSpaceUsage);
  1622. }
  1623. ApplyEverything();
  1624. }
  1625. void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) {
  1626. NodesInfoResponse->Set(std::move(ev));
  1627. ProcessResponses();
  1628. RequestDone();
  1629. }
  1630. void Handle(TEvWhiteboard::TEvNodeStateResponse::TPtr& ev) {
  1631. NodeStateResponse->Set(std::move(ev));
  1632. ProcessResponses();
  1633. RequestDone();
  1634. }
  1635. void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) {
  1636. if (ev->Cookie == ENavigateRequestDatabase) {
  1637. DatabaseNavigateResponse->Set(std::move(ev));
  1638. } else if (ev->Cookie == ENavigateRequestResource) {
  1639. ResourceNavigateResponse->Set(std::move(ev));
  1640. } else if (ev->Cookie == ENavigateRequestPath) {
  1641. PathNavigateResponse->Set(std::move(ev));
  1642. }
  1643. ProcessResponses();
  1644. RequestDone();
  1645. }
  1646. void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) {
  1647. if (ev->Cookie == EBoardInfoRequestDatabase) {
  1648. DatabaseBoardInfoResponse->Set(std::move(ev));
  1649. } else if (ev->Cookie == EBoardInfoRequestResource) {
  1650. ResourceBoardInfoResponse->Set(std::move(ev));
  1651. }
  1652. ProcessResponses();
  1653. RequestDone();
  1654. }
  1655. void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) {
  1656. HiveNodeStats[ev->Cookie].Set(std::move(ev));
  1657. ProcessResponses();
  1658. RequestDone();
  1659. }
  1660. void WhiteboardRequestDone() {
  1661. --WhiteboardStateRequestsInFlight;
  1662. if (WhiteboardStateRequestsInFlight == 0) {
  1663. ProcessWhiteboard();
  1664. }
  1665. RequestDone();
  1666. }
  1667. void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) {
  1668. ui64 nodeId = ev.Get()->Cookie;
  1669. SystemStateResponse[nodeId].Set(std::move(ev));
  1670. WhiteboardRequestDone();
  1671. }
  1672. void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) {
  1673. ui64 nodeId = ev.Get()->Cookie;
  1674. VDiskStateResponse[nodeId].Set(std::move(ev));
  1675. WhiteboardRequestDone();
  1676. }
  1677. void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) {
  1678. ui64 nodeId = ev.Get()->Cookie;
  1679. PDiskStateResponse[nodeId].Set(std::move(ev));
  1680. WhiteboardRequestDone();
  1681. }
  1682. void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) {
  1683. ui64 nodeId = ev.Get()->Cookie;
  1684. TabletStateResponse[nodeId].Set(std::move(ev));
  1685. WhiteboardRequestDone();
  1686. }
  1687. void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) {
  1688. ui64 nodeId = ev.Get()->Cookie;
  1689. switch (ev->Get()->Record.Response_case()) {
  1690. case NKikimrViewer::TEvViewerResponse::ResponseCase::kSystemResponse:
  1691. SystemViewerResponse[nodeId].Set(std::move(ev));
  1692. NodeBatches.erase(nodeId);
  1693. WhiteboardRequestDone();
  1694. return;
  1695. case NKikimrViewer::TEvViewerResponse::ResponseCase::kTabletResponse:
  1696. TabletViewerResponse[nodeId].Set(std::move(ev));
  1697. NodeBatches.erase(nodeId);
  1698. WhiteboardRequestDone();
  1699. return;
  1700. default:
  1701. break;
  1702. }
  1703. TString error("WrongResponse");
  1704. {
  1705. auto itSystemViewerResponse = SystemViewerResponse.find(nodeId);
  1706. if (itSystemViewerResponse != SystemViewerResponse.end()) {
  1707. if (itSystemViewerResponse->second.Error(error)) {
  1708. if (NodeBatches.count(nodeId)) {
  1709. SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]);
  1710. NodeBatches.erase(nodeId);
  1711. }
  1712. WhiteboardRequestDone();
  1713. }
  1714. }
  1715. }
  1716. {
  1717. auto itTabletViewerResponse = TabletViewerResponse.find(nodeId);
  1718. if (itTabletViewerResponse != TabletViewerResponse.end()) {
  1719. if (itTabletViewerResponse->second.Error(error)) {
  1720. if (NodeBatches.count(nodeId)) {
  1721. SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]);
  1722. NodeBatches.erase(nodeId);
  1723. }
  1724. WhiteboardRequestDone();
  1725. }
  1726. }
  1727. }
  1728. }
  1729. void Handle(NSysView::TEvSysView::TEvGetStoragePoolsResponse::TPtr& ev) {
  1730. StoragePoolsResponse->Set(std::move(ev));
  1731. ProcessResponses();
  1732. RequestDone();
  1733. }
  1734. void Handle(NSysView::TEvSysView::TEvGetGroupsResponse::TPtr& ev) {
  1735. GroupsResponse->Set(std::move(ev));
  1736. ProcessResponses();
  1737. RequestDone();
  1738. }
  1739. void Handle(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr& ev) {
  1740. VSlotsResponse->Set(std::move(ev));
  1741. ProcessResponses();
  1742. RequestDone();
  1743. }
  1744. void Handle(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr& ev) {
  1745. PDisksResponse->Set(std::move(ev));
  1746. ProcessResponses();
  1747. RequestDone();
  1748. }
  1749. void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
  1750. TNodeId nodeId = ev->Get()->NodeId;
  1751. TNode* node = FindNode(nodeId);
  1752. if (node) {
  1753. node->DisconnectNode();
  1754. if (FieldsRequired.test(+ENodeFields::PDisks) || FieldsRequired.test(+ENodeFields::VDisks)) {
  1755. node->RemapDisks();
  1756. }
  1757. }
  1758. TString error("NodeDisconnected");
  1759. {
  1760. auto itSystemStateResponse = SystemStateResponse.find(nodeId);
  1761. if (itSystemStateResponse != SystemStateResponse.end()) {
  1762. if (itSystemStateResponse->second.Error(error)) {
  1763. WhiteboardRequestDone();
  1764. }
  1765. }
  1766. }
  1767. {
  1768. auto itVDiskStateResponse = VDiskStateResponse.find(nodeId);
  1769. if (itVDiskStateResponse != VDiskStateResponse.end()) {
  1770. if (itVDiskStateResponse->second.Error(error)) {
  1771. WhiteboardRequestDone();
  1772. }
  1773. }
  1774. }
  1775. {
  1776. auto itPDiskStateResponse = PDiskStateResponse.find(nodeId);
  1777. if (itPDiskStateResponse != PDiskStateResponse.end()) {
  1778. if (itPDiskStateResponse->second.Error(error)) {
  1779. WhiteboardRequestDone();
  1780. }
  1781. }
  1782. }
  1783. {
  1784. auto itTabletStateResponse = TabletStateResponse.find(nodeId);
  1785. if (itTabletStateResponse != TabletStateResponse.end()) {
  1786. if (itTabletStateResponse->second.Error(error)) {
  1787. WhiteboardRequestDone();
  1788. }
  1789. }
  1790. }
  1791. {
  1792. auto itSystemViewerResponse = SystemViewerResponse.find(nodeId);
  1793. if (itSystemViewerResponse != SystemViewerResponse.end()) {
  1794. if (itSystemViewerResponse->second.Error(error)) {
  1795. if (NodeBatches.count(nodeId)) {
  1796. SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]);
  1797. NodeBatches.erase(nodeId);
  1798. }
  1799. WhiteboardRequestDone();
  1800. }
  1801. }
  1802. }
  1803. {
  1804. auto itTabletViewerResponse = TabletViewerResponse.find(nodeId);
  1805. if (itTabletViewerResponse != TabletViewerResponse.end()) {
  1806. if (itTabletViewerResponse->second.Error(error)) {
  1807. if (NodeBatches.count(nodeId)) {
  1808. SendWhiteboardSystemAndTabletsBatch(NodeBatches[nodeId]);
  1809. NodeBatches.erase(nodeId);
  1810. }
  1811. WhiteboardRequestDone();
  1812. }
  1813. }
  1814. }
  1815. }
  1816. bool OnBscError(const TString& error) {
  1817. bool result = false;
  1818. if (StoragePoolsResponse && StoragePoolsResponse->Error(error)) {
  1819. ProcessResponses();
  1820. result = true;
  1821. }
  1822. if (GroupsResponse && GroupsResponse->Error(error)) {
  1823. ProcessResponses();
  1824. result = true;
  1825. }
  1826. if (VSlotsResponse && VSlotsResponse->Error(error)) {
  1827. ProcessResponses();
  1828. result = true;
  1829. }
  1830. if (PDisksResponse && PDisksResponse->Error(error)) {
  1831. ProcessResponses();
  1832. result = true;
  1833. }
  1834. return result;
  1835. }
  1836. void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) {
  1837. if (ev->Get()->Status != NKikimrProto::OK) {
  1838. TString error = TStringBuilder() << "Failed to establish pipe to " << ev->Get()->TabletId << ": "
  1839. << NKikimrProto::EReplyStatus_Name(ev->Get()->Status);
  1840. auto it = HiveNodeStats.find(ev->Get()->TabletId);
  1841. if (it != HiveNodeStats.end()) {
  1842. if (it->second.Error(error)) {
  1843. AddProblem("hive-error");
  1844. ProcessResponses();
  1845. }
  1846. }
  1847. if (ev->Get()->TabletId == GetBSControllerId()) {
  1848. if (OnBscError(error)) {
  1849. AddProblem("bsc-error");
  1850. }
  1851. }
  1852. }
  1853. TBase::Handle(ev); // all RequestDone() are handled by base handler
  1854. }
  1855. void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) {
  1856. CurrentTimeoutState = static_cast<ETimeoutTag>(ev->Get()->Tag);
  1857. TString error = "Timeout";
  1858. if (ev->Get()->Tag == TimeoutTablets) {
  1859. if (NodesInfoResponse && NodesInfoResponse->Error(error)) {
  1860. ProcessResponses();
  1861. }
  1862. if (NodeStateResponse && NodeStateResponse->Error(error)) {
  1863. ProcessResponses();
  1864. }
  1865. if (DatabaseNavigateResponse && DatabaseNavigateResponse->Error(error)) {
  1866. ProcessResponses();
  1867. }
  1868. if (ResourceNavigateResponse && ResourceNavigateResponse->Error(error)) {
  1869. ProcessResponses();
  1870. }
  1871. if (PathNavigateResponse && PathNavigateResponse->Error(error)) {
  1872. ProcessResponses();
  1873. }
  1874. if (OnBscError(error)) {
  1875. AddProblem("bsc-timeout");
  1876. }
  1877. RequestDone(FailPipeConnect(GetBSControllerId()));
  1878. for (auto& [hiveId, response] : HiveNodeStats) {
  1879. if (response.Error(error)) {
  1880. AddProblem("hive-timeout");
  1881. ProcessResponses();
  1882. RequestDone(FailPipeConnect(hiveId));
  1883. }
  1884. }
  1885. }
  1886. if (ev->Get()->Tag == TimeoutFinal) {
  1887. for (auto& [nodeId, response] : SystemViewerResponse) {
  1888. if (response.Error(error)) {
  1889. AddProblem("wb-incomplete");
  1890. WhiteboardRequestDone();
  1891. }
  1892. }
  1893. for (auto& [nodeId, response] : TabletViewerResponse) {
  1894. if (response.Error(error)) {
  1895. AddProblem("wb-incomplete");
  1896. WhiteboardRequestDone();
  1897. }
  1898. }
  1899. for (auto& [nodeId, response] : SystemStateResponse) {
  1900. if (response.Error(error)) {
  1901. AddProblem("wb-incomplete");
  1902. WhiteboardRequestDone();
  1903. }
  1904. }
  1905. for (auto& [nodeId, response] : VDiskStateResponse) {
  1906. if (response.Error(error)) {
  1907. AddProblem("wb-incomplete");
  1908. WhiteboardRequestDone();
  1909. }
  1910. }
  1911. for (auto& [nodeId, response] : PDiskStateResponse) {
  1912. if (response.Error(error)) {
  1913. AddProblem("wb-incomplete");
  1914. WhiteboardRequestDone();
  1915. }
  1916. }
  1917. for (auto& [nodeId, response] : TabletStateResponse) {
  1918. if (response.Error(error)) {
  1919. AddProblem("wb-incomplete");
  1920. WhiteboardRequestDone();
  1921. }
  1922. }
  1923. if (WaitingForResponse()) {
  1924. ReplyAndPassAway();
  1925. }
  1926. }
  1927. }
  1928. STATEFN(StateWork) {
  1929. switch (ev->GetTypeRewrite()) {
  1930. hFunc(TEvInterconnect::TEvNodesInfo, Handle);
  1931. hFunc(TEvWhiteboard::TEvNodeStateResponse, Handle);
  1932. hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle);
  1933. hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle);
  1934. hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle);
  1935. hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle);
  1936. hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle);
  1937. hFunc(TEvStateStorage::TEvBoardInfo, Handle);
  1938. hFunc(TEvHive::TEvResponseHiveNodeStats, Handle);
  1939. hFunc(NSysView::TEvSysView::TEvGetGroupsResponse, Handle);
  1940. hFunc(NSysView::TEvSysView::TEvGetStoragePoolsResponse, Handle);
  1941. hFunc(NSysView::TEvSysView::TEvGetVSlotsResponse, Handle);
  1942. hFunc(NSysView::TEvSysView::TEvGetPDisksResponse, Handle);
  1943. hFunc(TEvViewer::TEvViewerResponse, Handle);
  1944. hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
  1945. hFunc(TEvTabletPipe::TEvClientConnected, Handle);
  1946. hFunc(TEvents::TEvWakeup, HandleTimeout);
  1947. }
  1948. }
  1949. void ReplyAndPassAway() override {
  1950. AddEvent("ReplyAndPassAway");
  1951. ApplyEverything();
  1952. NKikimrViewer::TNodesInfo json;
  1953. json.SetVersion(2);
  1954. json.SetFieldsAvailable(FieldsAvailable.to_string());
  1955. json.SetFieldsRequired(FieldsRequired.to_string());
  1956. if (NeedFilter) {
  1957. json.SetNeedFilter(true);
  1958. }
  1959. if (NeedGroup) {
  1960. json.SetNeedGroup(true);
  1961. }
  1962. if (NeedSort) {
  1963. json.SetNeedSort(true);
  1964. }
  1965. if (NeedLimit) {
  1966. json.SetNeedLimit(true);
  1967. }
  1968. json.SetTotalNodes(TotalNodes);
  1969. json.SetFoundNodes(FoundNodes);
  1970. if (MaximumDisksPerNode.has_value()) {
  1971. json.SetMaximumDisksPerNode(MaximumDisksPerNode.value());
  1972. }
  1973. if (MaximumSlotsPerDisk.has_value()) {
  1974. json.SetMaximumSlotsPerDisk(MaximumSlotsPerDisk.value());
  1975. }
  1976. if (NoDC) {
  1977. json.SetNoDC(true);
  1978. }
  1979. if (NoRack) {
  1980. json.SetNoRack(true);
  1981. }
  1982. for (auto problem : Problems) {
  1983. json.AddProblems(problem);
  1984. }
  1985. if (NodeGroups.empty()) {
  1986. for (TNode* node : NodeView) {
  1987. NKikimrViewer::TNodeInfo& jsonNode = *json.AddNodes();
  1988. if (FieldsAvailable.test(+ENodeFields::NodeInfo)) {
  1989. jsonNode.SetNodeId(node->GetNodeId());
  1990. }
  1991. if (FieldsAvailable.test(+ENodeFields::NodeInfo) || FieldsAvailable.test(+ENodeFields::SystemState)) {
  1992. *jsonNode.MutableSystemState() = std::move(node->SystemState);
  1993. }
  1994. if (FieldsAvailable.test(+ENodeFields::PDisks)) {
  1995. for (NKikimrWhiteboard::TPDiskStateInfo& pDisk : node->PDisks) {
  1996. (*jsonNode.AddPDisks()) = std::move(pDisk);
  1997. }
  1998. std::sort(node->PDisks.begin(), node->PDisks.end(), [](const NKikimrWhiteboard::TPDiskStateInfo& a, const NKikimrWhiteboard::TPDiskStateInfo& b) {
  1999. return a.pdiskid() < b.pdiskid();
  2000. });
  2001. }
  2002. if (FieldsAvailable.test(+ENodeFields::VDisks)) {
  2003. for (NKikimrWhiteboard::TVDiskStateInfo& vDisk : node->VDisks) {
  2004. (*jsonNode.AddVDisks()) = std::move(vDisk);
  2005. }
  2006. std::sort(node->VDisks.begin(), node->VDisks.end(), [](const NKikimrWhiteboard::TVDiskStateInfo& a, const NKikimrWhiteboard::TVDiskStateInfo& b) {
  2007. return VDiskIDFromVDiskID(a.vdiskid()) < VDiskIDFromVDiskID(b.vdiskid());
  2008. });
  2009. }
  2010. if (FieldsAvailable.test(+ENodeFields::Tablets)) {
  2011. for (NKikimrViewer::TTabletStateInfo& tablet : node->Tablets) {
  2012. (*jsonNode.AddTablets()) = std::move(tablet);
  2013. }
  2014. std::sort(node->Tablets.begin(), node->Tablets.end(), [](const NKikimrViewer::TTabletStateInfo& a, const NKikimrViewer::TTabletStateInfo& b) {
  2015. return a.type() < b.type();
  2016. });
  2017. }
  2018. }
  2019. } else {
  2020. for (const TNodeGroup& nodeGroup : NodeGroups) {
  2021. NKikimrViewer::TNodeGroup& jsonNodeGroup = *json.AddNodeGroups();
  2022. jsonNodeGroup.SetGroupName(nodeGroup.Name);
  2023. jsonNodeGroup.SetNodeCount(nodeGroup.Nodes.size());
  2024. }
  2025. }
  2026. TStringStream out;
  2027. Proto2Json(json, out, {
  2028. .EnumMode = TProto2JsonConfig::EnumValueMode::EnumName,
  2029. .StringifyNumbers = TProto2JsonConfig::EStringifyNumbersMode::StringifyInt64Always,
  2030. .WriteNanAsString = true,
  2031. });
  2032. TBase::ReplyAndPassAway(GetHTTPOKJSON(out.Str()));
  2033. }
  2034. static YAML::Node GetSwagger() {
  2035. TSimpleYamlBuilder yaml({
  2036. .Method = "get",
  2037. .Tag = "viewer",
  2038. .Summary = "Nodes info",
  2039. .Description = "Information about nodes",
  2040. });
  2041. yaml.AddParameter({
  2042. .Name = "path",
  2043. .Description = "path to schema object",
  2044. .Type = "string",
  2045. });
  2046. yaml.AddParameter({
  2047. .Name = "with",
  2048. .Description = "filter nodes by missing disks or space",
  2049. .Type = "string",
  2050. });
  2051. yaml.AddParameter({
  2052. .Name = "storage",
  2053. .Description = "return storage info",
  2054. .Type = "boolean",
  2055. });
  2056. yaml.AddParameter({
  2057. .Name = "tablets",
  2058. .Description = "return tablets info",
  2059. .Type = "boolean",
  2060. });
  2061. yaml.AddParameter({
  2062. .Name = "sort",
  2063. .Description = "sort by (NodeId,Host,DC,Rack,Version,Uptime,Missing)",
  2064. .Type = "string",
  2065. });
  2066. yaml.AddParameter({
  2067. .Name = "group",
  2068. .Description = "group by (NodeId,Host,DC,Rack,Version,Uptime,Missing)",
  2069. .Type = "string",
  2070. });
  2071. yaml.AddParameter({
  2072. .Name = "offset",
  2073. .Description = "skip N nodes",
  2074. .Type = "integer",
  2075. });
  2076. yaml.AddParameter({
  2077. .Name = "limit",
  2078. .Description = "limit to N nodes",
  2079. .Type = "integer",
  2080. });
  2081. yaml.AddParameter({
  2082. .Name = "timeout",
  2083. .Description = "timeout in ms",
  2084. .Type = "integer",
  2085. });
  2086. yaml.AddParameter({
  2087. .Name = "uptime",
  2088. .Description = "return only nodes with less uptime in sec.",
  2089. .Type = "integer",
  2090. });
  2091. yaml.AddParameter({
  2092. .Name = "problems_only",
  2093. .Description = "return only problem nodes",
  2094. .Type = "boolean",
  2095. });
  2096. yaml.AddParameter({
  2097. .Name = "filter",
  2098. .Description = "filter nodes by id or host",
  2099. .Type = "string",
  2100. });
  2101. yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema<NKikimrViewer::TNodesInfo>());
  2102. return yaml;
  2103. }
  2104. };
  2105. }