postgres.conf 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. template: postgres_total_connection_utilization
  3. on: postgres.connections_utilization
  4. class: Utilization
  5. type: Database
  6. component: PostgreSQL
  7. hosts: *
  8. lookup: average -1m unaligned of used
  9. units: %
  10. every: 1m
  11. warn: $this > (($status >= $WARNING) ? (70) : (80))
  12. crit: $this > (($status == $CRITICAL) ? (80) : (90))
  13. delay: down 15m multiplier 1.5 max 1h
  14. summary: PostgreSQL connection utilization
  15. info: Average total connection utilization over the last minute
  16. to: dba
  17. template: postgres_acquired_locks_utilization
  18. on: postgres.locks_utilization
  19. class: Utilization
  20. type: Database
  21. component: PostgreSQL
  22. hosts: *
  23. lookup: average -1m unaligned of used
  24. units: %
  25. every: 1m
  26. warn: $this > (($status >= $WARNING) ? (15) : (20))
  27. delay: down 15m multiplier 1.5 max 1h
  28. summary: PostgreSQL acquired locks utilization
  29. info: Average acquired locks utilization over the last minute
  30. to: dba
  31. template: postgres_txid_exhaustion_perc
  32. on: postgres.txid_exhaustion_perc
  33. class: Utilization
  34. type: Database
  35. component: PostgreSQL
  36. hosts: *
  37. calc: $txid_exhaustion
  38. units: %
  39. every: 1m
  40. warn: $this > 90
  41. delay: down 15m multiplier 1.5 max 1h
  42. summary: PostgreSQL TXID exhaustion
  43. info: Percent towards TXID wraparound
  44. to: dba
  45. # Database alarms
  46. template: postgres_db_cache_io_ratio
  47. on: postgres.db_cache_io_ratio
  48. class: Workload
  49. type: Database
  50. component: PostgreSQL
  51. hosts: *
  52. lookup: average -1m unaligned of miss
  53. calc: 100 - $this
  54. units: %
  55. every: 1m
  56. warn: $this < (($status >= $WARNING) ? (70) : (60))
  57. crit: $this < (($status == $CRITICAL) ? (60) : (50))
  58. delay: down 15m multiplier 1.5 max 1h
  59. summary: PostgreSQL DB ${label:database} cache hit ratio
  60. info: Average cache hit ratio in db ${label:database} over the last minute
  61. to: dba
  62. template: postgres_db_transactions_rollback_ratio
  63. on: postgres.db_transactions_ratio
  64. class: Workload
  65. type: Database
  66. component: PostgreSQL
  67. hosts: *
  68. lookup: average -5m unaligned of rollback
  69. units: %
  70. every: 1m
  71. warn: $this > (($status >= $WARNING) ? (0) : (2))
  72. delay: down 15m multiplier 1.5 max 1h
  73. summary: PostgreSQL DB ${label:database} aborted transactions
  74. info: Average aborted transactions percentage in db ${label:database} over the last five minutes
  75. to: dba
  76. template: postgres_db_deadlocks_rate
  77. on: postgres.db_deadlocks_rate
  78. class: Errors
  79. type: Database
  80. component: PostgreSQL
  81. hosts: *
  82. lookup: sum -1m unaligned of deadlocks
  83. units: deadlocks
  84. every: 1m
  85. warn: $this > (($status >= $WARNING) ? (0) : (10))
  86. delay: down 15m multiplier 1.5 max 1h
  87. summary: PostgreSQL DB ${label:database} deadlocks rate
  88. info: Number of deadlocks detected in db ${label:database} in the last minute
  89. to: dba
  90. # Table alarms
  91. template: postgres_table_cache_io_ratio
  92. on: postgres.table_cache_io_ratio
  93. class: Workload
  94. type: Database
  95. component: PostgreSQL
  96. hosts: *
  97. lookup: average -1m unaligned of miss
  98. calc: 100 - $this
  99. units: %
  100. every: 1m
  101. warn: $this < (($status >= $WARNING) ? (70) : (60))
  102. crit: $this < (($status == $CRITICAL) ? (60) : (50))
  103. delay: down 15m multiplier 1.5 max 1h
  104. summary: PostgreSQL table ${label:table} db ${label:database} cache hit ratio
  105. info: Average cache hit ratio in db ${label:database} table ${label:table} over the last minute
  106. to: dba
  107. template: postgres_table_index_cache_io_ratio
  108. on: postgres.table_index_cache_io_ratio
  109. class: Workload
  110. type: Database
  111. component: PostgreSQL
  112. hosts: *
  113. lookup: average -1m unaligned of miss
  114. calc: 100 - $this
  115. units: %
  116. every: 1m
  117. warn: $this < (($status >= $WARNING) ? (70) : (60))
  118. crit: $this < (($status == $CRITICAL) ? (60) : (50))
  119. delay: down 15m multiplier 1.5 max 1h
  120. summary: PostgreSQL table ${label:table} db ${label:database} index cache hit ratio
  121. info: Average index cache hit ratio in db ${label:database} table ${label:table} over the last minute
  122. to: dba
  123. template: postgres_table_toast_cache_io_ratio
  124. on: postgres.table_toast_cache_io_ratio
  125. class: Workload
  126. type: Database
  127. component: PostgreSQL
  128. hosts: *
  129. lookup: average -1m unaligned of miss
  130. calc: 100 - $this
  131. units: %
  132. every: 1m
  133. warn: $this < (($status >= $WARNING) ? (70) : (60))
  134. crit: $this < (($status == $CRITICAL) ? (60) : (50))
  135. delay: down 15m multiplier 1.5 max 1h
  136. summary: PostgreSQL table ${label:table} db ${label:database} toast cache hit ratio
  137. info: Average TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
  138. to: dba
  139. template: postgres_table_toast_index_cache_io_ratio
  140. on: postgres.table_toast_index_cache_io_ratio
  141. class: Workload
  142. type: Database
  143. component: PostgreSQL
  144. hosts: *
  145. lookup: average -1m unaligned of miss
  146. calc: 100 - $this
  147. units: %
  148. every: 1m
  149. warn: $this < (($status >= $WARNING) ? (70) : (60))
  150. crit: $this < (($status == $CRITICAL) ? (60) : (50))
  151. delay: down 15m multiplier 1.5 max 1h
  152. summary: PostgreSQL table ${label:table} db ${label:database} index toast hit ratio
  153. info: average index TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
  154. to: dba
  155. template: postgres_table_bloat_size_perc
  156. on: postgres.table_bloat_size_perc
  157. class: Errors
  158. type: Database
  159. component: PostgreSQL
  160. hosts: *
  161. calc: ($table_size > (1024 * 1024 * 100)) ? ($bloat) : (0)
  162. units: %
  163. every: 1m
  164. warn: $this > (($status >= $WARNING) ? (60) : (70))
  165. crit: $this > (($status == $CRITICAL) ? (70) : (80))
  166. delay: down 15m multiplier 1.5 max 1h
  167. summary: PostgreSQL table ${label:table} db ${label:database} bloat size
  168. info: Bloat size percentage in db ${label:database} table ${label:table}
  169. to: dba
  170. template: postgres_table_last_autovacuum_time
  171. on: postgres.table_autovacuum_since_time
  172. class: Errors
  173. type: Database
  174. component: PostgreSQL
  175. hosts: !*
  176. calc: $time
  177. units: seconds
  178. every: 1m
  179. warn: $this != nan AND $this > (60 * 60 * 24 * 7)
  180. summary: PostgreSQL table ${label:table} db ${label:database} last autovacuum
  181. info: Time elapsed since db ${label:database} table ${label:table} was vacuumed by the autovacuum daemon
  182. to: dba
  183. template: postgres_table_last_autoanalyze_time
  184. on: postgres.table_autoanalyze_since_time
  185. class: Errors
  186. type: Database
  187. component: PostgreSQL
  188. hosts: !*
  189. calc: $time
  190. units: seconds
  191. every: 1m
  192. warn: $this != nan AND $this > (60 * 60 * 24 * 7)
  193. summary: PostgreSQL table ${label:table} db ${label:database} last autoanalyze
  194. info: Time elapsed since db ${label:database} table ${label:table} was analyzed by the autovacuum daemon
  195. to: dba
  196. # Index alarms
  197. template: postgres_index_bloat_size_perc
  198. on: postgres.index_bloat_size_perc
  199. class: Errors
  200. type: Database
  201. component: PostgreSQL
  202. hosts: *
  203. calc: ($index_size > (1024 * 1024 * 10)) ? ($bloat) : (0)
  204. units: %
  205. every: 1m
  206. warn: $this > (($status >= $WARNING) ? (60) : (70))
  207. crit: $this > (($status == $CRITICAL) ? (70) : (80))
  208. delay: down 15m multiplier 1.5 max 1h
  209. summary: PostgreSQL table ${label:table} db ${label:database} index bloat size
  210. info: Bloat size percentage in db ${label:database} table ${label:table} index ${label:index}
  211. to: dba