httpcheck.conf 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  2. template: httpcheck_web_service_up
  3. families: *
  4. on: httpcheck.status
  5. class: Utilization
  6. type: Web Server
  7. component: HTTP endpoint
  8. lookup: average -1m unaligned percentage of success
  9. calc: ($this < 75) ? (0) : ($this)
  10. every: 5s
  11. units: up/down
  12. info: average ratio of successful HTTP requests over the last minute (at least 75%)
  13. to: silent
  14. template: httpcheck_web_service_bad_content
  15. families: *
  16. on: httpcheck.status
  17. class: Workload
  18. type: Web Server
  19. component: HTTP endpoint
  20. lookup: average -5m unaligned percentage of bad_content
  21. every: 10s
  22. units: %
  23. warn: $this >= 10 AND $this < 40
  24. crit: $this >= 40
  25. delay: down 5m multiplier 1.5 max 1h
  26. info: average ratio of HTTP responses with unexpected content over the last 5 minutes
  27. options: no-clear-notification
  28. to: webmaster
  29. template: httpcheck_web_service_bad_status
  30. families: *
  31. on: httpcheck.status
  32. class: Workload
  33. type: Web Server
  34. component: HTTP endpoint
  35. lookup: average -5m unaligned percentage of bad_status
  36. every: 10s
  37. units: %
  38. warn: $this >= 10 AND $this < 40
  39. crit: $this >= 40
  40. delay: down 5m multiplier 1.5 max 1h
  41. info: average ratio of HTTP responses with unexpected status over the last 5 minutes
  42. options: no-clear-notification
  43. to: webmaster
  44. template: httpcheck_web_service_timeouts
  45. families: *
  46. on: httpcheck.status
  47. class: Latency
  48. type: Web Server
  49. component: HTTP endpoint
  50. lookup: average -5m unaligned percentage of timeout
  51. every: 10s
  52. units: %
  53. info: average ratio of HTTP request timeouts over the last 5 minutes
  54. template: httpcheck_no_web_service_connections
  55. families: *
  56. on: httpcheck.status
  57. class: Errors
  58. type: Other
  59. component: HTTP endpoint
  60. lookup: average -5m unaligned percentage of no_connection
  61. every: 10s
  62. units: %
  63. info: average ratio of failed requests during the last 5 minutes
  64. # combined timeout & no connection alarm
  65. template: httpcheck_web_service_unreachable
  66. families: *
  67. on: httpcheck.status
  68. class: Errors
  69. type: Web Server
  70. component: HTTP endpoint
  71. calc: ($httpcheck_no_web_service_connections >= $httpcheck_web_service_timeouts) ? ($httpcheck_no_web_service_connections) : ($httpcheck_web_service_timeouts)
  72. units: %
  73. every: 10s
  74. warn: ($httpcheck_no_web_service_connections >= 10 OR $httpcheck_web_service_timeouts >= 10) AND ($httpcheck_no_web_service_connections < 40 OR $httpcheck_web_service_timeouts < 40)
  75. crit: $httpcheck_no_web_service_connections >= 40 OR $httpcheck_web_service_timeouts >= 40
  76. delay: down 5m multiplier 1.5 max 1h
  77. info: ratio of failed requests either due to timeouts or no connection over the last 5 minutes
  78. options: no-clear-notification
  79. to: webmaster
  80. template: httpcheck_1h_web_service_response_time
  81. families: *
  82. on: httpcheck.responsetime
  83. class: Latency
  84. type: Other
  85. component: HTTP endpoint
  86. lookup: average -1h unaligned of time
  87. every: 30s
  88. units: ms
  89. info: average HTTP response time over the last hour
  90. template: httpcheck_web_service_slow
  91. families: *
  92. on: httpcheck.responsetime
  93. class: Latency
  94. type: Web Server
  95. component: HTTP endpoint
  96. lookup: average -3m unaligned of time
  97. units: ms
  98. every: 10s
  99. warn: ($this > ($httpcheck_1h_web_service_response_time * 2) )
  100. crit: ($this > ($httpcheck_1h_web_service_response_time * 3) )
  101. delay: down 5m multiplier 1.5 max 1h
  102. info: average HTTP response time over the last 3 minutes, compared to the average over the last hour
  103. options: no-clear-notification
  104. to: webmaster