metrics.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. package stats
  2. import (
  3. "fmt"
  4. "log"
  5. "net"
  6. "net/http"
  7. "os"
  8. "strconv"
  9. "strings"
  10. "time"
  11. "github.com/prometheus/client_golang/prometheus"
  12. "github.com/prometheus/client_golang/prometheus/collectors"
  13. "github.com/prometheus/client_golang/prometheus/promhttp"
  14. "github.com/prometheus/client_golang/prometheus/push"
  15. "github.com/seaweedfs/seaweedfs/weed/glog"
  16. )
  17. // Readonly volume types
  18. const (
  19. Namespace = "SeaweedFS"
  20. IsReadOnly = "IsReadOnly"
  21. NoWriteOrDelete = "noWriteOrDelete"
  22. NoWriteCanDelete = "noWriteCanDelete"
  23. IsDiskSpaceLow = "isDiskSpaceLow"
  24. )
  25. var readOnlyVolumeTypes = [4]string{IsReadOnly, NoWriteOrDelete, NoWriteCanDelete, IsDiskSpaceLow}
  26. var (
  27. Gather = prometheus.NewRegistry()
  28. MasterClientConnectCounter = prometheus.NewCounterVec(
  29. prometheus.CounterOpts{
  30. Namespace: Namespace,
  31. Subsystem: "wdclient",
  32. Name: "connect_updates",
  33. Help: "Counter of master client leader updates.",
  34. }, []string{"type"})
  35. MasterRaftIsleader = prometheus.NewGauge(
  36. prometheus.GaugeOpts{
  37. Namespace: Namespace,
  38. Subsystem: "master",
  39. Name: "is_leader",
  40. Help: "is leader",
  41. })
  42. MasterAdminLock = prometheus.NewGaugeVec(
  43. prometheus.GaugeOpts{
  44. Namespace: Namespace,
  45. Subsystem: "master",
  46. Name: "admin_lock",
  47. Help: "admin lock",
  48. }, []string{"client"})
  49. MasterReceivedHeartbeatCounter = prometheus.NewCounterVec(
  50. prometheus.CounterOpts{
  51. Namespace: Namespace,
  52. Subsystem: "master",
  53. Name: "received_heartbeats",
  54. Help: "Counter of master received heartbeat.",
  55. }, []string{"type"})
  56. MasterReplicaPlacementMismatch = prometheus.NewGaugeVec(
  57. prometheus.GaugeOpts{
  58. Namespace: Namespace,
  59. Subsystem: "master",
  60. Name: "replica_placement_mismatch",
  61. Help: "replica placement mismatch",
  62. }, []string{"collection", "id"})
  63. MasterLeaderChangeCounter = prometheus.NewCounterVec(
  64. prometheus.CounterOpts{
  65. Namespace: Namespace,
  66. Subsystem: "master",
  67. Name: "leader_changes",
  68. Help: "Counter of master leader changes.",
  69. }, []string{"type"})
  70. FilerRequestCounter = prometheus.NewCounterVec(
  71. prometheus.CounterOpts{
  72. Namespace: Namespace,
  73. Subsystem: "filer",
  74. Name: "request_total",
  75. Help: "Counter of filer requests.",
  76. }, []string{"type"})
  77. FilerRequestHistogram = prometheus.NewHistogramVec(
  78. prometheus.HistogramOpts{
  79. Namespace: Namespace,
  80. Subsystem: "filer",
  81. Name: "request_seconds",
  82. Help: "Bucketed histogram of filer request processing time.",
  83. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  84. }, []string{"type"})
  85. FilerServerLastSendTsOfSubscribeGauge = prometheus.NewGaugeVec(
  86. prometheus.GaugeOpts{
  87. Namespace: Namespace,
  88. Subsystem: "filer",
  89. Name: "last_send_timestamp_of_subscribe",
  90. Help: "The last send timestamp of the filer subscription.",
  91. }, []string{"sourceFiler", "clientName", "path"})
  92. FilerStoreCounter = prometheus.NewCounterVec(
  93. prometheus.CounterOpts{
  94. Namespace: Namespace,
  95. Subsystem: "filerStore",
  96. Name: "request_total",
  97. Help: "Counter of filer store requests.",
  98. }, []string{"store", "type"})
  99. FilerStoreHistogram = prometheus.NewHistogramVec(
  100. prometheus.HistogramOpts{
  101. Namespace: Namespace,
  102. Subsystem: "filerStore",
  103. Name: "request_seconds",
  104. Help: "Bucketed histogram of filer store request processing time.",
  105. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  106. }, []string{"store", "type"})
  107. FilerSyncOffsetGauge = prometheus.NewGaugeVec(
  108. prometheus.GaugeOpts{
  109. Namespace: Namespace,
  110. Subsystem: "filerSync",
  111. Name: "sync_offset",
  112. Help: "The offset of the filer synchronization service.",
  113. }, []string{"sourceFiler", "targetFiler", "clientName", "path"})
  114. VolumeServerRequestCounter = prometheus.NewCounterVec(
  115. prometheus.CounterOpts{
  116. Namespace: Namespace,
  117. Subsystem: "volumeServer",
  118. Name: "request_total",
  119. Help: "Counter of volume server requests.",
  120. }, []string{"type"})
  121. VolumeServerVacuumingCompactCounter = prometheus.NewCounterVec(
  122. prometheus.CounterOpts{
  123. Namespace: Namespace,
  124. Subsystem: "volumeServer",
  125. Name: "vacuuming_compact_count",
  126. Help: "Counter of volume vacuuming Compact counter",
  127. }, []string{"success"})
  128. VolumeServerVacuumingCommitCounter = prometheus.NewCounterVec(
  129. prometheus.CounterOpts{
  130. Namespace: Namespace,
  131. Subsystem: "volumeServer",
  132. Name: "vacuuming_commit_count",
  133. Help: "Counter of volume vacuuming commit counter",
  134. }, []string{"success"})
  135. VolumeServerVacuumingHistogram = prometheus.NewHistogramVec(
  136. prometheus.HistogramOpts{
  137. Namespace: Namespace,
  138. Subsystem: "volumeServer",
  139. Name: "vacuuming_seconds",
  140. Help: "Bucketed histogram of volume server vacuuming processing time.",
  141. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  142. }, []string{"type"})
  143. VolumeServerRequestHistogram = prometheus.NewHistogramVec(
  144. prometheus.HistogramOpts{
  145. Namespace: Namespace,
  146. Subsystem: "volumeServer",
  147. Name: "request_seconds",
  148. Help: "Bucketed histogram of volume server request processing time.",
  149. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  150. }, []string{"type"})
  151. VolumeServerVolumeCounter = prometheus.NewGaugeVec(
  152. prometheus.GaugeOpts{
  153. Namespace: Namespace,
  154. Subsystem: "volumeServer",
  155. Name: "volumes",
  156. Help: "Number of volumes or shards.",
  157. }, []string{"collection", "type"})
  158. VolumeServerReadOnlyVolumeGauge = prometheus.NewGaugeVec(
  159. prometheus.GaugeOpts{
  160. Namespace: Namespace,
  161. Subsystem: "volumeServer",
  162. Name: "read_only_volumes",
  163. Help: "Number of read only volumes.",
  164. }, []string{"collection", "type"})
  165. VolumeServerMaxVolumeCounter = prometheus.NewGauge(
  166. prometheus.GaugeOpts{
  167. Namespace: Namespace,
  168. Subsystem: "volumeServer",
  169. Name: "max_volumes",
  170. Help: "Maximum number of volumes.",
  171. })
  172. VolumeServerDiskSizeGauge = prometheus.NewGaugeVec(
  173. prometheus.GaugeOpts{
  174. Namespace: Namespace,
  175. Subsystem: "volumeServer",
  176. Name: "total_disk_size",
  177. Help: "Actual disk size used by volumes.",
  178. }, []string{"collection", "type"})
  179. VolumeServerResourceGauge = prometheus.NewGaugeVec(
  180. prometheus.GaugeOpts{
  181. Namespace: Namespace,
  182. Subsystem: "volumeServer",
  183. Name: "resource",
  184. Help: "Resource usage",
  185. }, []string{"name", "type"})
  186. S3RequestCounter = prometheus.NewCounterVec(
  187. prometheus.CounterOpts{
  188. Namespace: Namespace,
  189. Subsystem: "s3",
  190. Name: "request_total",
  191. Help: "Counter of s3 requests.",
  192. }, []string{"type", "code", "bucket"})
  193. S3RequestHistogram = prometheus.NewHistogramVec(
  194. prometheus.HistogramOpts{
  195. Namespace: Namespace,
  196. Subsystem: "s3",
  197. Name: "request_seconds",
  198. Help: "Bucketed histogram of s3 request processing time.",
  199. Buckets: prometheus.ExponentialBuckets(0.0001, 2, 24),
  200. }, []string{"type", "bucket"})
  201. )
  202. func init() {
  203. Gather.MustRegister(MasterClientConnectCounter)
  204. Gather.MustRegister(MasterRaftIsleader)
  205. Gather.MustRegister(MasterAdminLock)
  206. Gather.MustRegister(MasterReceivedHeartbeatCounter)
  207. Gather.MustRegister(MasterLeaderChangeCounter)
  208. Gather.MustRegister(MasterReplicaPlacementMismatch)
  209. Gather.MustRegister(FilerRequestCounter)
  210. Gather.MustRegister(FilerRequestHistogram)
  211. Gather.MustRegister(FilerStoreCounter)
  212. Gather.MustRegister(FilerStoreHistogram)
  213. Gather.MustRegister(FilerSyncOffsetGauge)
  214. Gather.MustRegister(FilerServerLastSendTsOfSubscribeGauge)
  215. Gather.MustRegister(collectors.NewGoCollector())
  216. Gather.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
  217. Gather.MustRegister(VolumeServerRequestCounter)
  218. Gather.MustRegister(VolumeServerRequestHistogram)
  219. Gather.MustRegister(VolumeServerVacuumingCompactCounter)
  220. Gather.MustRegister(VolumeServerVacuumingCommitCounter)
  221. Gather.MustRegister(VolumeServerVacuumingHistogram)
  222. Gather.MustRegister(VolumeServerVolumeCounter)
  223. Gather.MustRegister(VolumeServerMaxVolumeCounter)
  224. Gather.MustRegister(VolumeServerReadOnlyVolumeGauge)
  225. Gather.MustRegister(VolumeServerDiskSizeGauge)
  226. Gather.MustRegister(VolumeServerResourceGauge)
  227. Gather.MustRegister(S3RequestCounter)
  228. Gather.MustRegister(S3RequestHistogram)
  229. }
  230. func LoopPushingMetric(name, instance, addr string, intervalSeconds int) {
  231. if addr == "" || intervalSeconds == 0 {
  232. return
  233. }
  234. glog.V(0).Infof("%s server sends metrics to %s every %d seconds", name, addr, intervalSeconds)
  235. pusher := push.New(addr, name).Gatherer(Gather).Grouping("instance", instance)
  236. for {
  237. err := pusher.Push()
  238. if err != nil && !strings.HasPrefix(err.Error(), "unexpected status code 200") {
  239. glog.V(0).Infof("could not push metrics to prometheus push gateway %s: %v", addr, err)
  240. }
  241. if intervalSeconds <= 0 {
  242. intervalSeconds = 15
  243. }
  244. time.Sleep(time.Duration(intervalSeconds) * time.Second)
  245. }
  246. }
  247. func StartMetricsServer(ip string, port int) {
  248. if port == 0 {
  249. return
  250. }
  251. http.Handle("/metrics", promhttp.HandlerFor(Gather, promhttp.HandlerOpts{}))
  252. log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", ip, port), nil))
  253. }
  254. func SourceName(port uint32) string {
  255. hostname, err := os.Hostname()
  256. if err != nil {
  257. return "unknown"
  258. }
  259. return net.JoinHostPort(hostname, strconv.Itoa(int(port)))
  260. }
  261. // todo - can be changed to DeletePartialMatch when https://github.com/prometheus/client_golang/pull/1013 gets released
  262. func DeleteCollectionMetrics(collection string) {
  263. VolumeServerDiskSizeGauge.DeleteLabelValues(collection, "normal")
  264. for _, volume_type := range readOnlyVolumeTypes {
  265. VolumeServerReadOnlyVolumeGauge.DeleteLabelValues(collection, volume_type)
  266. }
  267. VolumeServerVolumeCounter.DeleteLabelValues(collection, "volume")
  268. }