process_stats.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. #include "actorsystem.h"
  2. #include "actor_bootstrapped.h"
  3. #include "hfunc.h"
  4. #include "process_stats.h"
  5. #include <library/cpp/monlib/dynamic_counters/counters.h>
  6. #include <library/cpp/monlib/metrics/metric_registry.h>
  7. #include <util/datetime/uptime.h>
  8. #include <util/system/defaults.h>
  9. #include <util/stream/file.h>
  10. #include <util/string/vector.h>
  11. #include <util/string/split.h>
  12. #ifndef _win_
  13. #include <sys/user.h>
  14. #include <sys/sysctl.h>
  15. #endif
  16. namespace NActors {
  17. #ifdef _linux_
  18. namespace {
  19. template <typename TVal>
  20. static bool ExtractVal(const TString& str, const TString& name, TVal& res) {
  21. if (!str.StartsWith(name))
  22. return false;
  23. size_t pos = name.size();
  24. while (pos < str.size() && (str[pos] == ' ' || str[pos] == '\t')) {
  25. pos++;
  26. }
  27. res = atol(str.data() + pos);
  28. return true;
  29. }
  30. float TicksPerMillisec() {
  31. #ifdef _SC_CLK_TCK
  32. return sysconf(_SC_CLK_TCK) / 1000.0;
  33. #else
  34. return 1.f;
  35. #endif
  36. }
  37. }
  38. bool TProcStat::Fill(pid_t pid) {
  39. try {
  40. TString strPid(ToString(pid));
  41. TFileInput proc("/proc/" + strPid + "/status");
  42. TString str;
  43. while (proc.ReadLine(str)) {
  44. if (ExtractVal(str, "VmRSS:", Rss))
  45. continue;
  46. if (ExtractVal(str, "voluntary_ctxt_switches:", VolCtxSwtch))
  47. continue;
  48. if (ExtractVal(str, "nonvoluntary_ctxt_switches:", NonvolCtxSwtch))
  49. continue;
  50. }
  51. // Convert from kB to bytes
  52. Rss *= 1024;
  53. float tickPerMillisec = TicksPerMillisec();
  54. TFileInput procStat("/proc/" + strPid + "/stat");
  55. procStat.ReadLine(str);
  56. if (!str.empty()) {
  57. sscanf(str.data(),
  58. "%d %*s %c %d %d %d %d %d %u %lu %lu "
  59. "%lu %lu %lu %lu %ld %ld %ld %ld %ld "
  60. "%ld %llu %lu %ld %lu",
  61. &Pid, &State, &Ppid, &Pgrp, &Session, &TtyNr, &TPgid, &Flags, &MinFlt, &CMinFlt,
  62. &MajFlt, &CMajFlt, &Utime, &Stime, &CUtime, &CStime, &Priority, &Nice, &NumThreads,
  63. &ItRealValue, &StartTime, &Vsize, &RssPages, &RssLim);
  64. Utime /= tickPerMillisec;
  65. Stime /= tickPerMillisec;
  66. CUtime /= tickPerMillisec;
  67. CStime /= tickPerMillisec;
  68. SystemUptime = ::Uptime();
  69. Uptime = SystemUptime - TDuration::MilliSeconds(StartTime / TicksPerMillisec());
  70. }
  71. TFileInput statm("/proc/" + strPid + "/statm");
  72. statm.ReadLine(str);
  73. TVector<TString> fields;
  74. StringSplitter(str).Split(' ').SkipEmpty().Collect(&fields);
  75. if (fields.size() >= 7) {
  76. ui64 resident = FromString<ui64>(fields[1]);
  77. ui64 shared = FromString<ui64>(fields[2]);
  78. if (PageSize == 0) {
  79. PageSize = ObtainPageSize();
  80. }
  81. FileRss = shared * PageSize;
  82. AnonRss = (resident - shared) * PageSize;
  83. }
  84. TFileInput cgroup("/proc/" + strPid + "/cgroup");
  85. TString line;
  86. TString memoryCGroup;
  87. while (cgroup.ReadLine(line) > 0) {
  88. StringSplitter(line).Split(':').Collect(&fields);
  89. if (fields.size() > 2 && fields[1] == "memory") {
  90. memoryCGroup = fields[2];
  91. break;
  92. }
  93. }
  94. if (!memoryCGroup.empty()) {
  95. TFileInput limit("/sys/fs/cgroup/memory" + memoryCGroup + "/memory.limit_in_bytes");
  96. if (limit.ReadLine(line) > 0) {
  97. CGroupMemLim = FromString<ui64>(line);
  98. if (CGroupMemLim > (1ULL << 40)) {
  99. CGroupMemLim = 0;
  100. }
  101. }
  102. }
  103. } catch (...) {
  104. return false;
  105. }
  106. return true;
  107. }
  108. long TProcStat::ObtainPageSize() {
  109. long sz = sysconf(_SC_PAGESIZE);
  110. return sz;
  111. }
  112. #else
  113. bool TProcStat::Fill(pid_t pid) {
  114. Y_UNUSED(pid);
  115. return false;
  116. }
  117. long TProcStat::ObtainPageSize() {
  118. return 0;
  119. }
  120. #endif
  121. namespace {
  122. // Periodically collects process stats and exposes them as mon counters
  123. template <typename TDerived>
  124. class TProcStatCollectingActor: public TActorBootstrapped<TProcStatCollectingActor<TDerived>> {
  125. public:
  126. static constexpr IActor::EActivityType ActorActivityType() {
  127. return IActor::ACTORLIB_STATS;
  128. }
  129. TProcStatCollectingActor(TDuration interval)
  130. : Interval(interval)
  131. {
  132. }
  133. void Bootstrap(const TActorContext& ctx) {
  134. ctx.Schedule(Interval, new TEvents::TEvWakeup());
  135. Self()->Become(&TDerived::StateWork);
  136. }
  137. STFUNC(StateWork) {
  138. switch (ev->GetTypeRewrite()) {
  139. CFunc(TEvents::TSystem::Wakeup, Wakeup);
  140. }
  141. }
  142. private:
  143. void Wakeup(const TActorContext& ctx) {
  144. Self()->UpdateCounters(ProcStat);
  145. ctx.Schedule(Interval, new TEvents::TEvWakeup());
  146. }
  147. TDerived* Self() {
  148. ProcStat.Fill(getpid());
  149. return static_cast<TDerived*>(this);
  150. }
  151. private:
  152. const TDuration Interval;
  153. TProcStat ProcStat;
  154. };
  155. // Periodically collects process stats and exposes them as mon counters
  156. class TDynamicCounterCollector: public TProcStatCollectingActor<TDynamicCounterCollector> {
  157. using TBase = TProcStatCollectingActor<TDynamicCounterCollector>;
  158. public:
  159. TDynamicCounterCollector(
  160. ui32 intervalSeconds,
  161. NMonitoring::TDynamicCounterPtr counters)
  162. : TBase{TDuration::Seconds(intervalSeconds)}
  163. {
  164. ProcStatGroup = counters->GetSubgroup("counters", "utils");
  165. VmSize = ProcStatGroup->GetCounter("Process/VmSize", false);
  166. AnonRssSize = ProcStatGroup->GetCounter("Process/AnonRssSize", false);
  167. FileRssSize = ProcStatGroup->GetCounter("Process/FileRssSize", false);
  168. CGroupMemLimit = ProcStatGroup->GetCounter("Process/CGroupMemLimit", false);
  169. UserTime = ProcStatGroup->GetCounter("Process/UserTime", true);
  170. SysTime = ProcStatGroup->GetCounter("Process/SystemTime", true);
  171. MinorPageFaults = ProcStatGroup->GetCounter("Process/MinorPageFaults", true);
  172. MajorPageFaults = ProcStatGroup->GetCounter("Process/MajorPageFaults", true);
  173. UptimeSeconds = ProcStatGroup->GetCounter("Process/UptimeSeconds", false);
  174. NumThreads = ProcStatGroup->GetCounter("Process/NumThreads", false);
  175. SystemUptimeSeconds = ProcStatGroup->GetCounter("System/UptimeSeconds", false);
  176. }
  177. void UpdateCounters(const TProcStat& procStat) {
  178. *VmSize = procStat.Vsize;
  179. *AnonRssSize = procStat.AnonRss;
  180. *FileRssSize = procStat.FileRss;
  181. if (procStat.CGroupMemLim) {
  182. *CGroupMemLimit = procStat.CGroupMemLim;
  183. }
  184. *UserTime = procStat.Utime;
  185. *SysTime = procStat.Stime;
  186. *MinorPageFaults = procStat.MinFlt;
  187. *MajorPageFaults = procStat.MajFlt;
  188. *UptimeSeconds = procStat.Uptime.Seconds();
  189. *NumThreads = procStat.NumThreads;
  190. *SystemUptimeSeconds = procStat.Uptime.Seconds();
  191. }
  192. private:
  193. NMonitoring::TDynamicCounterPtr ProcStatGroup;
  194. NMonitoring::TDynamicCounters::TCounterPtr VmSize;
  195. NMonitoring::TDynamicCounters::TCounterPtr AnonRssSize;
  196. NMonitoring::TDynamicCounters::TCounterPtr FileRssSize;
  197. NMonitoring::TDynamicCounters::TCounterPtr CGroupMemLimit;
  198. NMonitoring::TDynamicCounters::TCounterPtr UserTime;
  199. NMonitoring::TDynamicCounters::TCounterPtr SysTime;
  200. NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults;
  201. NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults;
  202. NMonitoring::TDynamicCounters::TCounterPtr UptimeSeconds;
  203. NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
  204. NMonitoring::TDynamicCounters::TCounterPtr SystemUptimeSeconds;
  205. };
  206. class TRegistryCollector: public TProcStatCollectingActor<TRegistryCollector> {
  207. using TBase = TProcStatCollectingActor<TRegistryCollector>;
  208. public:
  209. TRegistryCollector(TDuration interval, NMonitoring::TMetricRegistry& registry)
  210. : TBase{interval}
  211. {
  212. VmSize = registry.IntGauge({{"sensor", "process.VmSize"}});
  213. AnonRssSize = registry.IntGauge({{"sensor", "process.AnonRssSize"}});
  214. FileRssSize = registry.IntGauge({{"sensor", "process.FileRssSize"}});
  215. CGroupMemLimit = registry.IntGauge({{"sensor", "process.CGroupMemLimit"}});
  216. UptimeSeconds = registry.IntGauge({{"sensor", "process.UptimeSeconds"}});
  217. NumThreads = registry.IntGauge({{"sensor", "process.NumThreads"}});
  218. SystemUptimeSeconds = registry.IntGauge({{"sensor", "system.UptimeSeconds"}});
  219. UserTime = registry.Rate({{"sensor", "process.UserTime"}});
  220. SysTime = registry.Rate({{"sensor", "process.SystemTime"}});
  221. MinorPageFaults = registry.Rate({{"sensor", "process.MinorPageFaults"}});
  222. MajorPageFaults = registry.Rate({{"sensor", "process.MajorPageFaults"}});
  223. }
  224. void UpdateCounters(const TProcStat& procStat) {
  225. VmSize->Set(procStat.Vsize);
  226. AnonRssSize->Set(procStat.AnonRss);
  227. FileRssSize->Set(procStat.FileRss);
  228. CGroupMemLimit->Set(procStat.CGroupMemLim);
  229. UptimeSeconds->Set(procStat.Uptime.Seconds());
  230. NumThreads->Set(procStat.NumThreads);
  231. SystemUptimeSeconds->Set(procStat.SystemUptime.Seconds());
  232. // it is ok here to reset and add metric value, because mutation
  233. // is performed in siglethreaded context
  234. UserTime->Reset();
  235. UserTime->Add(procStat.Utime);
  236. SysTime->Reset();
  237. SysTime->Add(procStat.Stime);
  238. MinorPageFaults->Reset();
  239. MinorPageFaults->Add(procStat.MinFlt);
  240. MajorPageFaults->Reset();
  241. MajorPageFaults->Add(procStat.MajFlt);
  242. }
  243. private:
  244. NMonitoring::TIntGauge* VmSize;
  245. NMonitoring::TIntGauge* AnonRssSize;
  246. NMonitoring::TIntGauge* FileRssSize;
  247. NMonitoring::TIntGauge* CGroupMemLimit;
  248. NMonitoring::TRate* UserTime;
  249. NMonitoring::TRate* SysTime;
  250. NMonitoring::TRate* MinorPageFaults;
  251. NMonitoring::TRate* MajorPageFaults;
  252. NMonitoring::TIntGauge* UptimeSeconds;
  253. NMonitoring::TIntGauge* NumThreads;
  254. NMonitoring::TIntGauge* SystemUptimeSeconds;
  255. };
  256. } // namespace
  257. IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters) {
  258. return new TDynamicCounterCollector(intervalSec, counters);
  259. }
  260. IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) {
  261. return new TRegistryCollector(interval, registry);
  262. }
  263. }