@@ -0,0 +1,229 @@
+// dashboard generator for these metrics can be found at: github.com/ydb-platform/ydb/arcadia/library/go/yandex/monitoring-dashboards
+package collect
+import (
+ "context"
+ "os"
+ "runtime"
+ "runtime/debug"
+ "time"
+ "github.com/prometheus/procfs"
+ "github.com/ydb-platform/ydb/library/go/core/buildinfo"
+ "github.com/ydb-platform/ydb/library/go/core/metrics"
+var _ Func = GoMetrics
+func GoMetrics(_ context.Context, r metrics.Registry, c metrics.CollectPolicy) {
+ if r == nil {
+ return
+ }
+ r = r.WithPrefix("go")
+ var stats debug.GCStats
+ stats.PauseQuantiles = make([]time.Duration, 5) // Minimum, 25%, 50%, 75%, and maximum pause times.
+ var numGoroutine, numThread int
+ var ms runtime.MemStats
+ c.AddCollect(func(context.Context) {
+ debug.ReadGCStats(&stats)
+ runtime.ReadMemStats(&ms)
+ numThread, _ = runtime.ThreadCreateProfile(nil)
+ numGoroutine = runtime.NumGoroutine()
+ })
+ gcRegistry := r.WithPrefix("gc")
+ gcRegistry.FuncCounter("num", c.RegisteredCounter(func() int64 {
+ return stats.NumGC
+ }))
+ gcRegistry.FuncCounter(r.ComposeName("pause", "total", "ns"), c.RegisteredCounter(func() int64 {
+ return stats.PauseTotal.Nanoseconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("pause", "quantile", "min"), c.RegisteredGauge(func() float64 {
+ return stats.PauseQuantiles[0].Seconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("pause", "quantile", "25"), c.RegisteredGauge(func() float64 {
+ return stats.PauseQuantiles[1].Seconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("pause", "quantile", "50"), c.RegisteredGauge(func() float64 {
+ return stats.PauseQuantiles[2].Seconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("pause", "quantile", "75"), c.RegisteredGauge(func() float64 {
+ return stats.PauseQuantiles[3].Seconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("pause", "quantile", "max"), c.RegisteredGauge(func() float64 {
+ return stats.PauseQuantiles[4].Seconds()
+ }))
+ gcRegistry.FuncGauge(r.ComposeName("last", "ts"), c.RegisteredGauge(func() float64 {
+ return float64(ms.LastGC)
+ }))
+ gcRegistry.FuncCounter(r.ComposeName("forced", "num"), c.RegisteredCounter(func() int64 {
+ return int64(ms.NumForcedGC)
+ }))
+ r.FuncGauge(r.ComposeName("goroutine", "num"), c.RegisteredGauge(func() float64 {
+ return float64(numGoroutine)
+ }))
+ r.FuncGauge(r.ComposeName("thread", "num"), c.RegisteredGauge(func() float64 {
+ return float64(numThread)
+ }))
+ memRegistry := r.WithPrefix("mem")
+ memRegistry.FuncCounter(r.ComposeName("alloc", "total"), c.RegisteredCounter(func() int64 {
+ return int64(ms.TotalAlloc)
+ }))
+ memRegistry.FuncGauge("sys", c.RegisteredGauge(func() float64 {
+ return float64(ms.Sys)
+ }))
+ memRegistry.FuncCounter("lookups", c.RegisteredCounter(func() int64 {
+ return int64(ms.Lookups)
+ }))
+ memRegistry.FuncCounter("mallocs", c.RegisteredCounter(func() int64 {
+ return int64(ms.Mallocs)
+ }))
+ memRegistry.FuncCounter("frees", c.RegisteredCounter(func() int64 {
+ return int64(ms.Frees)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "alloc"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapAlloc)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "idle"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapIdle)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "inuse"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapInuse)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "released"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapReleased)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("heap", "objects"), c.RegisteredGauge(func() float64 {
+ return float64(ms.HeapObjects)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("stack", "inuse"), c.RegisteredGauge(func() float64 {
+ return float64(ms.StackInuse)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("stack", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.StackSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("span", "inuse"), c.RegisteredGauge(func() float64 {
+ return float64(ms.MSpanInuse)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("span", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.MSpanSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("cache", "inuse"), c.RegisteredGauge(func() float64 {
+ return float64(ms.MCacheInuse)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("cache", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.MCacheSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("buck", "hash", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.BuckHashSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("gc", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.GCSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("other", "sys"), c.RegisteredGauge(func() float64 {
+ return float64(ms.OtherSys)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("gc", "next"), c.RegisteredGauge(func() float64 {
+ return float64(ms.NextGC)
+ }))
+ memRegistry.FuncGauge(r.ComposeName("gc", "cpu", "fraction"), c.RegisteredGauge(func() float64 {
+ return ms.GCCPUFraction
+ }))
+var _ Func = ProcessMetrics
+func ProcessMetrics(_ context.Context, r metrics.Registry, c metrics.CollectPolicy) {
+ if r == nil {
+ return
+ }
+ buildVersion := buildinfo.Info.ArcadiaSourceRevision
+ r.WithTags(map[string]string{"revision": buildVersion}).Gauge("build").Set(1.0)
+ pid := os.Getpid()
+ proc, err := procfs.NewProc(pid)
+ if err != nil {
+ return
+ }
+ procRegistry := r.WithPrefix("proc")
+ var ioStat procfs.ProcIO
+ var procStat procfs.ProcStat
+ var fd int
+ var cpuWait uint64
+ const clocksPerSec = 100
+ c.AddCollect(func(ctx context.Context) {
+ if gatheredFD, err := proc.FileDescriptorsLen(); err == nil {
+ fd = gatheredFD
+ }
+ if gatheredIOStat, err := proc.IO(); err == nil {
+ ioStat.SyscW = gatheredIOStat.SyscW
+ ioStat.WriteBytes = gatheredIOStat.WriteBytes
+ ioStat.SyscR = gatheredIOStat.SyscR
+ ioStat.ReadBytes = gatheredIOStat.ReadBytes
+ }
+ if gatheredStat, err := proc.Stat(); err == nil {
+ procStat.UTime = gatheredStat.UTime
+ procStat.STime = gatheredStat.STime
+ procStat.RSS = gatheredStat.RSS
+ }
+ if gatheredSched, err := proc.Schedstat(); err == nil {
+ cpuWait = gatheredSched.WaitingNanoseconds
+ }
+ })
+ procRegistry.FuncGauge("fd", c.RegisteredGauge(func() float64 {
+ return float64(fd)
+ }))
+ ioRegistry := procRegistry.WithPrefix("io")
+ ioRegistry.FuncCounter(r.ComposeName("read", "count"), c.RegisteredCounter(func() int64 {
+ return int64(ioStat.SyscR)
+ }))
+ ioRegistry.FuncCounter(r.ComposeName("read", "bytes"), c.RegisteredCounter(func() int64 {
+ return int64(ioStat.ReadBytes)
+ }))
+ ioRegistry.FuncCounter(r.ComposeName("write", "count"), c.RegisteredCounter(func() int64 {
+ return int64(ioStat.SyscW)
+ }))
+ ioRegistry.FuncCounter(r.ComposeName("write", "bytes"), c.RegisteredCounter(func() int64 {
+ return int64(ioStat.WriteBytes)
+ }))
+ cpuRegistry := procRegistry.WithPrefix("cpu")
+ cpuRegistry.FuncCounter(r.ComposeName("total", "ns"), c.RegisteredCounter(func() int64 {
+ return int64(procStat.UTime+procStat.STime) * (1_000_000_000 / clocksPerSec)
+ }))
+ cpuRegistry.FuncCounter(r.ComposeName("user", "ns"), c.RegisteredCounter(func() int64 {
+ return int64(procStat.UTime) * (1_000_000_000 / clocksPerSec)
+ }))
+ cpuRegistry.FuncCounter(r.ComposeName("system", "ns"), c.RegisteredCounter(func() int64 {
+ return int64(procStat.STime) * (1_000_000_000 / clocksPerSec)
+ }))
+ cpuRegistry.FuncCounter(r.ComposeName("wait", "ns"), c.RegisteredCounter(func() int64 {
+ return int64(cpuWait)
+ }))
+ procRegistry.FuncGauge(r.ComposeName("mem", "rss"), c.RegisteredGauge(func() float64 {
+ return float64(procStat.RSS)
+ }))