disks.conf 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. # -----------------------------------------------------------------------------
  3. # low disk space
  4. # checking the latest collected values
  5. # raise an alarm if the disk is low on
  6. # available disk space
  7. template: disk_space_usage
  8. on: disk.space
  9. class: Utilization
  10. type: System
  11. component: Disk
  12. os: linux freebsd
  13. hosts: *
  14. chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
  15. calc: $used * 100 / ($avail + $used)
  16. units: %
  17. every: 1m
  18. warn: $this > (($status >= $WARNING ) ? (80) : (90))
  19. crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
  20. delay: up 1m down 15m multiplier 1.5 max 1h
  21. summary: Disk ${label:mount_point} space usage
  22. info: Total space utilization of disk ${label:mount_point}
  23. to: sysadmin
  24. template: disk_inode_usage
  25. on: disk.inodes
  26. class: Utilization
  27. type: System
  28. component: Disk
  29. os: linux freebsd
  30. hosts: *
  31. chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
  32. calc: $used * 100 / ($avail + $used)
  33. units: %
  34. every: 1m
  35. warn: $this > (($status >= $WARNING) ? (80) : (90))
  36. crit: $this > (($status == $CRITICAL) ? (90) : (98))
  37. delay: up 1m down 15m multiplier 1.5 max 1h
  38. summary: Disk ${label:mount_point} inode usage
  39. info: Total inode utilization of disk ${label:mount_point}
  40. to: sysadmin
  41. # -----------------------------------------------------------------------------
  42. # disk fill rate
  43. # calculate the rate the disk fills
  44. # use as base, the available space change
  45. # during the last hour
  46. # this is just a calculation - it has no alarm
  47. # we will use it in the next template to find
  48. # the hours remaining
  49. template: disk_fill_rate
  50. on: disk.space
  51. os: linux freebsd
  52. hosts: *
  53. lookup: min -10m at -50m unaligned of avail
  54. calc: ($this - $avail) / (($now - $after) / 3600)
  55. every: 1m
  56. units: GB/hour
  57. info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
  58. # calculate the hours remaining
  59. # if the disk continues to fill
  60. # in this rate
  61. template: out_of_disk_space_time
  62. on: disk.space
  63. os: linux freebsd
  64. hosts: *
  65. calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
  66. units: hours
  67. every: 10s
  68. warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
  69. crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
  70. delay: down 15m multiplier 1.2 max 1h
  71. summary: Disk ${label:mount_point} estimation of lack of space
  72. info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour
  73. to: silent
  74. # -----------------------------------------------------------------------------
  75. # disk inode fill rate
  76. # calculate the rate the disk inodes are allocated
  77. # use as base, the available inodes change
  78. # during the last hour
  79. # this is just a calculation - it has no alarm
  80. # we will use it in the next template to find
  81. # the hours remaining
  82. template: disk_inode_rate
  83. on: disk.inodes
  84. os: linux freebsd
  85. hosts: *
  86. lookup: min -10m at -50m unaligned of avail
  87. calc: ($this - $avail) / (($now - $after) / 3600)
  88. every: 1m
  89. units: inodes/hour
  90. info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
  91. # calculate the hours remaining
  92. # if the disk inodes are allocated
  93. # in this rate
  94. template: out_of_disk_inodes_time
  95. on: disk.inodes
  96. os: linux freebsd
  97. hosts: *
  98. calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
  99. units: hours
  100. every: 10s
  101. warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
  102. crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
  103. delay: down 15m multiplier 1.2 max 1h
  104. summary: Disk ${label:mount_point} estimation of lack of inodes
  105. info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
  106. to: silent
  107. # -----------------------------------------------------------------------------
  108. # disk congestion
  109. # raise an alarm if the disk is congested
  110. # by calculating the average disk utilization
  111. # for the last 10 minutes
  112. template: 10min_disk_utilization
  113. on: disk.util
  114. class: Utilization
  115. type: System
  116. component: Disk
  117. os: linux freebsd
  118. hosts: *
  119. lookup: average -10m unaligned
  120. units: %
  121. every: 1m
  122. warn: $this > 98 * (($status >= $WARNING) ? (0.7) : (1))
  123. delay: down 15m multiplier 1.2 max 1h
  124. summary: Disk ${label:device} utilization
  125. info: Average percentage of time ${label:device} disk was busy over the last 10 minutes
  126. to: silent
  127. # raise an alarm if the disk backlog
  128. # is above 1000ms (1s) per second
  129. # for 10 minutes
  130. # (i.e. the disk cannot catch up)
  131. template: 10min_disk_backlog
  132. on: disk.backlog
  133. class: Latency
  134. type: System
  135. component: Disk
  136. os: linux
  137. hosts: *
  138. lookup: average -10m unaligned
  139. units: ms
  140. every: 1m
  141. warn: $this > 5000 * (($status >= $WARNING) ? (0.7) : (1))
  142. delay: down 15m multiplier 1.2 max 1h
  143. summary: Disk ${label:device} backlog
  144. info: Average backlog size of the ${label:device} disk over the last 10 minutes
  145. to: silent