123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303 |
- #include "actorsystem.h"
- #include "actor_bootstrapped.h"
- #include "hfunc.h"
- #include "process_stats.h"
- #include <library/cpp/monlib/dynamic_counters/counters.h>
- #include <library/cpp/monlib/metrics/metric_registry.h>
- #include <util/datetime/uptime.h>
- #include <util/system/defaults.h>
- #include <util/stream/file.h>
- #include <util/string/vector.h>
- #include <util/string/split.h>
- #ifndef _win_
- #include <sys/user.h>
- #include <sys/sysctl.h>
- #endif
- namespace NActors {
- #ifdef _linux_
- namespace {
- template <typename TVal>
- static bool ExtractVal(const TString& str, const TString& name, TVal& res) {
- if (!str.StartsWith(name))
- return false;
- size_t pos = name.size();
- while (pos < str.size() && (str[pos] == ' ' || str[pos] == '\t')) {
- pos++;
- }
- res = atol(str.data() + pos);
- return true;
- }
- float TicksPerMillisec() {
- #ifdef _SC_CLK_TCK
- return sysconf(_SC_CLK_TCK) / 1000.0;
- #else
- return 1.f;
- #endif
- }
- }
- bool TProcStat::Fill(pid_t pid) {
- try {
- TString strPid(ToString(pid));
- TFileInput proc("/proc/" + strPid + "/status");
- TString str;
- while (proc.ReadLine(str)) {
- if (ExtractVal(str, "VmRSS:", Rss))
- continue;
- if (ExtractVal(str, "voluntary_ctxt_switches:", VolCtxSwtch))
- continue;
- if (ExtractVal(str, "nonvoluntary_ctxt_switches:", NonvolCtxSwtch))
- continue;
- }
- // Convert from kB to bytes
- Rss *= 1024;
- float tickPerMillisec = TicksPerMillisec();
- TFileInput procStat("/proc/" + strPid + "/stat");
- procStat.ReadLine(str);
- if (!str.empty()) {
- sscanf(str.data(),
- "%d %*s %c %d %d %d %d %d %u %lu %lu "
- "%lu %lu %lu %lu %ld %ld %ld %ld %ld "
- "%ld %llu %lu %ld %lu",
- &Pid, &State, &Ppid, &Pgrp, &Session, &TtyNr, &TPgid, &Flags, &MinFlt, &CMinFlt,
- &MajFlt, &CMajFlt, &Utime, &Stime, &CUtime, &CStime, &Priority, &Nice, &NumThreads,
- &ItRealValue, &StartTime, &Vsize, &RssPages, &RssLim);
- Utime /= tickPerMillisec;
- Stime /= tickPerMillisec;
- CUtime /= tickPerMillisec;
- CStime /= tickPerMillisec;
- SystemUptime = ::Uptime();
- Uptime = SystemUptime - TDuration::MilliSeconds(StartTime / TicksPerMillisec());
- }
- TFileInput statm("/proc/" + strPid + "/statm");
- statm.ReadLine(str);
- TVector<TString> fields;
- StringSplitter(str).Split(' ').SkipEmpty().Collect(&fields);
- if (fields.size() >= 7) {
- ui64 resident = FromString<ui64>(fields[1]);
- ui64 shared = FromString<ui64>(fields[2]);
- if (PageSize == 0) {
- PageSize = ObtainPageSize();
- }
- FileRss = shared * PageSize;
- AnonRss = (resident - shared) * PageSize;
- }
- TFileInput cgroup("/proc/" + strPid + "/cgroup");
- TString line;
- TString memoryCGroup;
- while (cgroup.ReadLine(line) > 0) {
- StringSplitter(line).Split(':').Collect(&fields);
- if (fields.size() > 2 && fields[1] == "memory") {
- memoryCGroup = fields[2];
- break;
- }
- }
- if (!memoryCGroup.empty()) {
- TFileInput limit("/sys/fs/cgroup/memory" + memoryCGroup + "/memory.limit_in_bytes");
- if (limit.ReadLine(line) > 0) {
- CGroupMemLim = FromString<ui64>(line);
- if (CGroupMemLim > (1ULL << 40)) {
- CGroupMemLim = 0;
- }
- }
- }
- } catch (...) {
- return false;
- }
- return true;
- }
- long TProcStat::ObtainPageSize() {
- long sz = sysconf(_SC_PAGESIZE);
- return sz;
- }
- #else
- bool TProcStat::Fill(pid_t pid) {
- Y_UNUSED(pid);
- return false;
- }
- long TProcStat::ObtainPageSize() {
- return 0;
- }
- #endif
- namespace {
- // Periodically collects process stats and exposes them as mon counters
- template <typename TDerived>
- class TProcStatCollectingActor: public TActorBootstrapped<TProcStatCollectingActor<TDerived>> {
- public:
- static constexpr IActor::EActivityType ActorActivityType() {
- return IActor::ACTORLIB_STATS;
- }
- TProcStatCollectingActor(TDuration interval)
- : Interval(interval)
- {
- }
- void Bootstrap(const TActorContext& ctx) {
- ctx.Schedule(Interval, new TEvents::TEvWakeup());
- Self()->Become(&TDerived::StateWork);
- }
- STFUNC(StateWork) {
- switch (ev->GetTypeRewrite()) {
- CFunc(TEvents::TSystem::Wakeup, Wakeup);
- }
- }
- private:
- void Wakeup(const TActorContext& ctx) {
- Self()->UpdateCounters(ProcStat);
- ctx.Schedule(Interval, new TEvents::TEvWakeup());
- }
- TDerived* Self() {
- ProcStat.Fill(getpid());
- return static_cast<TDerived*>(this);
- }
- private:
- const TDuration Interval;
- TProcStat ProcStat;
- };
- // Periodically collects process stats and exposes them as mon counters
- class TDynamicCounterCollector: public TProcStatCollectingActor<TDynamicCounterCollector> {
- using TBase = TProcStatCollectingActor<TDynamicCounterCollector>;
- public:
- TDynamicCounterCollector(
- ui32 intervalSeconds,
- NMonitoring::TDynamicCounterPtr counters)
- : TBase{TDuration::Seconds(intervalSeconds)}
- {
- ProcStatGroup = counters->GetSubgroup("counters", "utils");
- VmSize = ProcStatGroup->GetCounter("Process/VmSize", false);
- AnonRssSize = ProcStatGroup->GetCounter("Process/AnonRssSize", false);
- FileRssSize = ProcStatGroup->GetCounter("Process/FileRssSize", false);
- CGroupMemLimit = ProcStatGroup->GetCounter("Process/CGroupMemLimit", false);
- UserTime = ProcStatGroup->GetCounter("Process/UserTime", true);
- SysTime = ProcStatGroup->GetCounter("Process/SystemTime", true);
- MinorPageFaults = ProcStatGroup->GetCounter("Process/MinorPageFaults", true);
- MajorPageFaults = ProcStatGroup->GetCounter("Process/MajorPageFaults", true);
- UptimeSeconds = ProcStatGroup->GetCounter("Process/UptimeSeconds", false);
- NumThreads = ProcStatGroup->GetCounter("Process/NumThreads", false);
- SystemUptimeSeconds = ProcStatGroup->GetCounter("System/UptimeSeconds", false);
- }
- void UpdateCounters(const TProcStat& procStat) {
- *VmSize = procStat.Vsize;
- *AnonRssSize = procStat.AnonRss;
- *FileRssSize = procStat.FileRss;
- if (procStat.CGroupMemLim) {
- *CGroupMemLimit = procStat.CGroupMemLim;
- }
- *UserTime = procStat.Utime;
- *SysTime = procStat.Stime;
- *MinorPageFaults = procStat.MinFlt;
- *MajorPageFaults = procStat.MajFlt;
- *UptimeSeconds = procStat.Uptime.Seconds();
- *NumThreads = procStat.NumThreads;
- *SystemUptimeSeconds = procStat.Uptime.Seconds();
- }
- private:
- NMonitoring::TDynamicCounterPtr ProcStatGroup;
- NMonitoring::TDynamicCounters::TCounterPtr VmSize;
- NMonitoring::TDynamicCounters::TCounterPtr AnonRssSize;
- NMonitoring::TDynamicCounters::TCounterPtr FileRssSize;
- NMonitoring::TDynamicCounters::TCounterPtr CGroupMemLimit;
- NMonitoring::TDynamicCounters::TCounterPtr UserTime;
- NMonitoring::TDynamicCounters::TCounterPtr SysTime;
- NMonitoring::TDynamicCounters::TCounterPtr MinorPageFaults;
- NMonitoring::TDynamicCounters::TCounterPtr MajorPageFaults;
- NMonitoring::TDynamicCounters::TCounterPtr UptimeSeconds;
- NMonitoring::TDynamicCounters::TCounterPtr NumThreads;
- NMonitoring::TDynamicCounters::TCounterPtr SystemUptimeSeconds;
- };
- class TRegistryCollector: public TProcStatCollectingActor<TRegistryCollector> {
- using TBase = TProcStatCollectingActor<TRegistryCollector>;
- public:
- TRegistryCollector(TDuration interval, NMonitoring::TMetricRegistry& registry)
- : TBase{interval}
- {
- VmSize = registry.IntGauge({{"sensor", "process.VmSize"}});
- AnonRssSize = registry.IntGauge({{"sensor", "process.AnonRssSize"}});
- FileRssSize = registry.IntGauge({{"sensor", "process.FileRssSize"}});
- CGroupMemLimit = registry.IntGauge({{"sensor", "process.CGroupMemLimit"}});
- UptimeSeconds = registry.IntGauge({{"sensor", "process.UptimeSeconds"}});
- NumThreads = registry.IntGauge({{"sensor", "process.NumThreads"}});
- SystemUptimeSeconds = registry.IntGauge({{"sensor", "system.UptimeSeconds"}});
- UserTime = registry.Rate({{"sensor", "process.UserTime"}});
- SysTime = registry.Rate({{"sensor", "process.SystemTime"}});
- MinorPageFaults = registry.Rate({{"sensor", "process.MinorPageFaults"}});
- MajorPageFaults = registry.Rate({{"sensor", "process.MajorPageFaults"}});
- }
- void UpdateCounters(const TProcStat& procStat) {
- VmSize->Set(procStat.Vsize);
- AnonRssSize->Set(procStat.AnonRss);
- FileRssSize->Set(procStat.FileRss);
- CGroupMemLimit->Set(procStat.CGroupMemLim);
- UptimeSeconds->Set(procStat.Uptime.Seconds());
- NumThreads->Set(procStat.NumThreads);
- SystemUptimeSeconds->Set(procStat.SystemUptime.Seconds());
- // it is ok here to reset and add metric value, because mutation
- // is performed in siglethreaded context
- UserTime->Reset();
- UserTime->Add(procStat.Utime);
- SysTime->Reset();
- SysTime->Add(procStat.Stime);
- MinorPageFaults->Reset();
- MinorPageFaults->Add(procStat.MinFlt);
- MajorPageFaults->Reset();
- MajorPageFaults->Add(procStat.MajFlt);
- }
- private:
- NMonitoring::TIntGauge* VmSize;
- NMonitoring::TIntGauge* AnonRssSize;
- NMonitoring::TIntGauge* FileRssSize;
- NMonitoring::TIntGauge* CGroupMemLimit;
- NMonitoring::TRate* UserTime;
- NMonitoring::TRate* SysTime;
- NMonitoring::TRate* MinorPageFaults;
- NMonitoring::TRate* MajorPageFaults;
- NMonitoring::TIntGauge* UptimeSeconds;
- NMonitoring::TIntGauge* NumThreads;
- NMonitoring::TIntGauge* SystemUptimeSeconds;
- };
- } // namespace
- IActor* CreateProcStatCollector(ui32 intervalSec, NMonitoring::TDynamicCounterPtr counters) {
- return new TDynamicCounterCollector(intervalSec, counters);
- }
- IActor* CreateProcStatCollector(TDuration interval, NMonitoring::TMetricRegistry& registry) {
- return new TRegistryCollector(interval, registry);
- }
- }
|