metrics.go 11 KB


  1. package frankenphp
  2. import (
  3. "errors"
  4. "regexp"
  5. "sync"
  6. "time"
  7. "github.com/dunglas/frankenphp/internal/fastabs"
  8. "github.com/prometheus/client_golang/prometheus"
  9. )
  10. var metricsNameRegex = regexp.MustCompile(`\W+`)
  11. var metricsNameFixRegex = regexp.MustCompile(`^_+|_+$`)
  12. const (
  13. StopReasonCrash = iota
  14. StopReasonRestart
  15. StopReasonShutdown
  16. )
  17. type StopReason int
  18. type Metrics interface {
  19. // StartWorker collects started workers
  20. StartWorker(name string)
  21. // ReadyWorker collects ready workers
  22. ReadyWorker(name string)
  23. // StopWorker collects stopped workers
  24. StopWorker(name string, reason StopReason)
  25. // TotalWorkers collects expected workers
  26. TotalWorkers(name string, num int)
  27. // TotalThreads collects total threads
  28. TotalThreads(num int)
  29. // StartRequest collects started requests
  30. StartRequest()
  31. // StopRequest collects stopped requests
  32. StopRequest()
  33. // StopWorkerRequest collects stopped worker requests
  34. StopWorkerRequest(name string, duration time.Duration)
  35. // StartWorkerRequest collects started worker requests
  36. StartWorkerRequest(name string)
  37. Shutdown()
  38. QueuedWorkerRequest(name string)
  39. DequeuedWorkerRequest(name string)
  40. QueuedRequest()
  41. DequeuedRequest()
  42. }
  43. type nullMetrics struct{}
  44. func (n nullMetrics) StartWorker(string) {
  45. }
  46. func (n nullMetrics) ReadyWorker(string) {
  47. }
  48. func (n nullMetrics) StopWorker(string, StopReason) {
  49. }
  50. func (n nullMetrics) TotalWorkers(string, int) {
  51. }
  52. func (n nullMetrics) TotalThreads(int) {
  53. }
  54. func (n nullMetrics) StartRequest() {
  55. }
  56. func (n nullMetrics) StopRequest() {
  57. }
  58. func (n nullMetrics) StopWorkerRequest(string, time.Duration) {
  59. }
  60. func (n nullMetrics) StartWorkerRequest(string) {
  61. }
  62. func (n nullMetrics) Shutdown() {
  63. }
  64. func (n nullMetrics) QueuedWorkerRequest(name string) {}
  65. func (n nullMetrics) DequeuedWorkerRequest(name string) {}
  66. func (n nullMetrics) QueuedRequest() {}
  67. func (n nullMetrics) DequeuedRequest() {}
  68. type PrometheusMetrics struct {
  69. registry prometheus.Registerer
  70. totalThreads prometheus.Counter
  71. busyThreads prometheus.Gauge
  72. totalWorkers map[string]prometheus.Gauge
  73. busyWorkers map[string]prometheus.Gauge
  74. readyWorkers map[string]prometheus.Gauge
  75. workerCrashes map[string]prometheus.Counter
  76. workerRestarts map[string]prometheus.Counter
  77. workerRequestTime map[string]prometheus.Counter
  78. workerRequestCount map[string]prometheus.Counter
  79. workerQueueDepth map[string]prometheus.Gauge
  80. queueDepth prometheus.Gauge
  81. mu sync.Mutex
  82. }
  83. func (m *PrometheusMetrics) StartWorker(name string) {
  84. m.busyThreads.Inc()
  85. // tests do not register workers before starting them
  86. if _, ok := m.totalWorkers[name]; !ok {
  87. return
  88. }
  89. m.totalWorkers[name].Inc()
  90. }
  91. func (m *PrometheusMetrics) ReadyWorker(name string) {
  92. if _, ok := m.totalWorkers[name]; !ok {
  93. return
  94. }
  95. m.readyWorkers[name].Inc()
  96. }
  97. func (m *PrometheusMetrics) StopWorker(name string, reason StopReason) {
  98. m.busyThreads.Dec()
  99. // tests do not register workers before starting them
  100. if _, ok := m.totalWorkers[name]; !ok {
  101. return
  102. }
  103. m.totalWorkers[name].Dec()
  104. m.readyWorkers[name].Dec()
  105. if reason == StopReasonCrash {
  106. m.workerCrashes[name].Inc()
  107. } else if reason == StopReasonRestart {
  108. m.workerRestarts[name].Inc()
  109. } else if reason == StopReasonShutdown {
  110. m.totalWorkers[name].Dec()
  111. }
  112. }
  113. func (m *PrometheusMetrics) getIdentity(name string) (string, error) {
  114. actualName, err := fastabs.FastAbs(name)
  115. if err != nil {
  116. return name, err
  117. }
  118. return actualName, nil
  119. }
  120. func (m *PrometheusMetrics) TotalWorkers(name string, _ int) {
  121. m.mu.Lock()
  122. defer m.mu.Unlock()
  123. identity, err := m.getIdentity(name)
  124. if err != nil {
  125. // do not create metrics, let error propagate when worker is started
  126. return
  127. }
  128. subsystem := getWorkerNameForMetrics(name)
  129. if _, ok := m.totalWorkers[identity]; !ok {
  130. m.totalWorkers[identity] = prometheus.NewGauge(prometheus.GaugeOpts{
  131. Namespace: "frankenphp",
  132. Subsystem: subsystem,
  133. Name: "total_workers",
  134. Help: "Total number of PHP workers for this worker",
  135. })
  136. if err := m.registry.Register(m.totalWorkers[identity]); err != nil &&
  137. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  138. panic(err)
  139. }
  140. }
  141. if _, ok := m.workerCrashes[identity]; !ok {
  142. m.workerCrashes[identity] = prometheus.NewCounter(prometheus.CounterOpts{
  143. Namespace: "frankenphp",
  144. Subsystem: subsystem,
  145. Name: "worker_crashes",
  146. Help: "Number of PHP worker crashes for this worker",
  147. })
  148. if err := m.registry.Register(m.workerCrashes[identity]); err != nil &&
  149. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  150. panic(err)
  151. }
  152. }
  153. if _, ok := m.workerRestarts[identity]; !ok {
  154. m.workerRestarts[identity] = prometheus.NewCounter(prometheus.CounterOpts{
  155. Namespace: "frankenphp",
  156. Subsystem: subsystem,
  157. Name: "worker_restarts",
  158. Help: "Number of PHP worker restarts for this worker",
  159. })
  160. if err := m.registry.Register(m.workerRestarts[identity]); err != nil &&
  161. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  162. panic(err)
  163. }
  164. }
  165. if _, ok := m.readyWorkers[identity]; !ok {
  166. m.readyWorkers[identity] = prometheus.NewGauge(prometheus.GaugeOpts{
  167. Namespace: "frankenphp",
  168. Subsystem: subsystem,
  169. Name: "ready_workers",
  170. Help: "Running workers that have successfully called frankenphp_handle_request at least once",
  171. })
  172. if err := m.registry.Register(m.readyWorkers[identity]); err != nil &&
  173. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  174. panic(err)
  175. }
  176. }
  177. if _, ok := m.busyWorkers[identity]; !ok {
  178. m.busyWorkers[identity] = prometheus.NewGauge(prometheus.GaugeOpts{
  179. Namespace: "frankenphp",
  180. Subsystem: subsystem,
  181. Name: "busy_workers",
  182. Help: "Number of busy PHP workers for this worker",
  183. })
  184. if err := m.registry.Register(m.busyWorkers[identity]); err != nil &&
  185. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  186. panic(err)
  187. }
  188. }
  189. if _, ok := m.workerRequestTime[identity]; !ok {
  190. m.workerRequestTime[identity] = prometheus.NewCounter(prometheus.CounterOpts{
  191. Namespace: "frankenphp",
  192. Subsystem: subsystem,
  193. Name: "worker_request_time",
  194. })
  195. if err := m.registry.Register(m.workerRequestTime[identity]); err != nil &&
  196. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  197. panic(err)
  198. }
  199. }
  200. if _, ok := m.workerRequestCount[identity]; !ok {
  201. m.workerRequestCount[identity] = prometheus.NewCounter(prometheus.CounterOpts{
  202. Namespace: "frankenphp",
  203. Subsystem: subsystem,
  204. Name: "worker_request_count",
  205. })
  206. if err := m.registry.Register(m.workerRequestCount[identity]); err != nil &&
  207. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  208. panic(err)
  209. }
  210. }
  211. if _, ok := m.workerQueueDepth[identity]; !ok {
  212. m.workerQueueDepth[identity] = prometheus.NewGauge(prometheus.GaugeOpts{
  213. Namespace: "frankenphp",
  214. Subsystem: subsystem,
  215. Name: "worker_queue_depth",
  216. })
  217. m.registry.MustRegister(m.workerQueueDepth[identity])
  218. }
  219. }
  220. func (m *PrometheusMetrics) TotalThreads(num int) {
  221. m.totalThreads.Add(float64(num))
  222. }
  223. func (m *PrometheusMetrics) StartRequest() {
  224. m.busyThreads.Inc()
  225. }
  226. func (m *PrometheusMetrics) StopRequest() {
  227. m.busyThreads.Dec()
  228. }
  229. func (m *PrometheusMetrics) StopWorkerRequest(name string, duration time.Duration) {
  230. if _, ok := m.workerRequestTime[name]; !ok {
  231. return
  232. }
  233. m.workerRequestCount[name].Inc()
  234. m.busyWorkers[name].Dec()
  235. m.workerRequestTime[name].Add(duration.Seconds())
  236. }
  237. func (m *PrometheusMetrics) StartWorkerRequest(name string) {
  238. if _, ok := m.busyWorkers[name]; !ok {
  239. return
  240. }
  241. m.busyWorkers[name].Inc()
  242. }
  243. func (m *PrometheusMetrics) QueuedWorkerRequest(name string) {
  244. if _, ok := m.workerQueueDepth[name]; !ok {
  245. return
  246. }
  247. m.workerQueueDepth[name].Inc()
  248. }
  249. func (m *PrometheusMetrics) DequeuedWorkerRequest(name string) {
  250. if _, ok := m.workerQueueDepth[name]; !ok {
  251. return
  252. }
  253. m.workerQueueDepth[name].Dec()
  254. }
  255. func (m *PrometheusMetrics) QueuedRequest() {
  256. m.queueDepth.Inc()
  257. }
  258. func (m *PrometheusMetrics) DequeuedRequest() {
  259. m.queueDepth.Dec()
  260. }
  261. func (m *PrometheusMetrics) Shutdown() {
  262. m.registry.Unregister(m.totalThreads)
  263. m.registry.Unregister(m.busyThreads)
  264. m.registry.Unregister(m.queueDepth)
  265. for _, g := range m.totalWorkers {
  266. m.registry.Unregister(g)
  267. }
  268. for _, g := range m.busyWorkers {
  269. m.registry.Unregister(g)
  270. }
  271. for _, c := range m.workerRequestTime {
  272. m.registry.Unregister(c)
  273. }
  274. for _, c := range m.workerRequestCount {
  275. m.registry.Unregister(c)
  276. }
  277. for _, c := range m.workerCrashes {
  278. m.registry.Unregister(c)
  279. }
  280. for _, c := range m.workerRestarts {
  281. m.registry.Unregister(c)
  282. }
  283. for _, g := range m.readyWorkers {
  284. m.registry.Unregister(g)
  285. }
  286. for _, g := range m.workerQueueDepth {
  287. m.registry.Unregister(g)
  288. }
  289. m.totalThreads = prometheus.NewCounter(prometheus.CounterOpts{
  290. Name: "frankenphp_total_threads",
  291. Help: "Total number of PHP threads",
  292. })
  293. m.busyThreads = prometheus.NewGauge(prometheus.GaugeOpts{
  294. Name: "frankenphp_busy_threads",
  295. Help: "Number of busy PHP threads",
  296. })
  297. m.totalWorkers = map[string]prometheus.Gauge{}
  298. m.busyWorkers = map[string]prometheus.Gauge{}
  299. m.workerRequestTime = map[string]prometheus.Counter{}
  300. m.workerRequestCount = map[string]prometheus.Counter{}
  301. m.workerRestarts = map[string]prometheus.Counter{}
  302. m.workerCrashes = map[string]prometheus.Counter{}
  303. m.readyWorkers = map[string]prometheus.Gauge{}
  304. m.workerQueueDepth = map[string]prometheus.Gauge{}
  305. m.queueDepth = prometheus.NewGauge(prometheus.GaugeOpts{
  306. Name: "frankenphp_queue_depth",
  307. Help: "Number of regular queued requests",
  308. })
  309. if err := m.registry.Register(m.totalThreads); err != nil &&
  310. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  311. panic(err)
  312. }
  313. if err := m.registry.Register(m.busyThreads); err != nil &&
  314. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  315. panic(err)
  316. }
  317. if err := m.registry.Register(m.queueDepth); err != nil &&
  318. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  319. panic(err)
  320. }
  321. }
  322. func getWorkerNameForMetrics(name string) string {
  323. name = metricsNameRegex.ReplaceAllString(name, "_")
  324. name = metricsNameFixRegex.ReplaceAllString(name, "")
  325. return name
  326. }
  327. func NewPrometheusMetrics(registry prometheus.Registerer) *PrometheusMetrics {
  328. if registry == nil {
  329. registry = prometheus.NewRegistry()
  330. }
  331. m := &PrometheusMetrics{
  332. registry: registry,
  333. totalThreads: prometheus.NewCounter(prometheus.CounterOpts{
  334. Name: "frankenphp_total_threads",
  335. Help: "Total number of PHP threads",
  336. }),
  337. busyThreads: prometheus.NewGauge(prometheus.GaugeOpts{
  338. Name: "frankenphp_busy_threads",
  339. Help: "Number of busy PHP threads",
  340. }),
  341. totalWorkers: map[string]prometheus.Gauge{},
  342. busyWorkers: map[string]prometheus.Gauge{},
  343. workerRequestTime: map[string]prometheus.Counter{},
  344. workerRequestCount: map[string]prometheus.Counter{},
  345. workerRestarts: map[string]prometheus.Counter{},
  346. workerCrashes: map[string]prometheus.Counter{},
  347. readyWorkers: map[string]prometheus.Gauge{},
  348. workerQueueDepth: map[string]prometheus.Gauge{},
  349. queueDepth: prometheus.NewGauge(prometheus.GaugeOpts{
  350. Name: "frankenphp_queue_depth",
  351. Help: "Number of regular queued requests",
  352. }),
  353. }
  354. if err := m.registry.Register(m.totalThreads); err != nil &&
  355. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  356. panic(err)
  357. }
  358. if err := m.registry.Register(m.busyThreads); err != nil &&
  359. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  360. panic(err)
  361. }
  362. if err := m.registry.Register(m.queueDepth); err != nil &&
  363. !errors.As(err, &prometheus.AlreadyRegisteredError{}) {
  364. panic(err)
  365. }
  366. return m
  367. }