disks.conf 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. # you can disable an alarm notification by setting the 'to' line to: silent
  2. # -----------------------------------------------------------------------------
  3. # low disk space
  4. # checking the latest collected values
  5. # raise an alarm if the disk is low on
  6. # available disk space
  7. template: disk_space_usage
  8. on: disk.space
  9. class: Utilization
  10. type: System
  11. component: Disk
  12. os: linux freebsd
  13. hosts: *
  14. chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
  15. calc: $used * 100 / ($avail + $used)
  16. units: %
  17. every: 1m
  18. warn: $this > (($status >= $WARNING ) ? (80) : (90))
  19. crit: $this > (($status == $CRITICAL) ? (90) : (98))
  20. delay: up 1m down 15m multiplier 1.5 max 1h
  21. info: disk ${label:mount_point} space utilization
  22. to: sysadmin
  23. template: disk_inode_usage
  24. on: disk.inodes
  25. class: Utilization
  26. type: System
  27. component: Disk
  28. os: linux freebsd
  29. hosts: *
  30. chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
  31. calc: $used * 100 / ($avail + $used)
  32. units: %
  33. every: 1m
  34. warn: $this > (($status >= $WARNING) ? (80) : (90))
  35. crit: $this > (($status == $CRITICAL) ? (90) : (98))
  36. delay: up 1m down 15m multiplier 1.5 max 1h
  37. info: disk ${label:mount_point} inode utilization
  38. to: sysadmin
  39. # -----------------------------------------------------------------------------
  40. # disk fill rate
  41. # calculate the rate the disk fills
  42. # use as base, the available space change
  43. # during the last hour
  44. # this is just a calculation - it has no alarm
  45. # we will use it in the next template to find
  46. # the hours remaining
  47. # template: disk_fill_rate
  48. # on: disk.space
  49. # os: linux freebsd
  50. # hosts: *
  51. # lookup: min -10m at -50m unaligned of avail
  52. # calc: ($this - $avail) / (($now - $after) / 3600)
  53. # every: 1m
  54. # units: GB/hour
  55. # info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
  56. # calculate the hours remaining
  57. # if the disk continues to fill
  58. # in this rate
  59. # template: out_of_disk_space_time
  60. # on: disk.space
  61. # os: linux freebsd
  62. # hosts: *
  63. # calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
  64. # units: hours
  65. # every: 10s
  66. # warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
  67. # crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
  68. # delay: down 15m multiplier 1.2 max 1h
  69. # info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
  70. # to: sysadmin
  71. # -----------------------------------------------------------------------------
  72. # disk inode fill rate
  73. # calculate the rate the disk inodes are allocated
  74. # use as base, the available inodes change
  75. # during the last hour
  76. # this is just a calculation - it has no alarm
  77. # we will use it in the next template to find
  78. # the hours remaining
  79. # template: disk_inode_rate
  80. # on: disk.inodes
  81. # os: linux freebsd
  82. # hosts: *
  83. # lookup: min -10m at -50m unaligned of avail
  84. # calc: ($this - $avail) / (($now - $after) / 3600)
  85. # every: 1m
  86. # units: inodes/hour
  87. # info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
  88. # calculate the hours remaining
  89. # if the disk inodes are allocated
  90. # in this rate
  91. # template: out_of_disk_inodes_time
  92. # on: disk.inodes
  93. # os: linux freebsd
  94. # hosts: *
  95. # calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
  96. # units: hours
  97. # every: 10s
  98. # warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
  99. # crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
  100. # delay: down 15m multiplier 1.2 max 1h
  101. # info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
  102. # to: sysadmin
  103. # -----------------------------------------------------------------------------
  104. # disk congestion
  105. # raise an alarm if the disk is congested
  106. # by calculating the average disk utilization
  107. # for the last 10 minutes
  108. template: 10min_disk_utilization
  109. on: disk.util
  110. class: Utilization
  111. type: System
  112. component: Disk
  113. os: linux freebsd
  114. hosts: *
  115. lookup: average -10m unaligned
  116. units: %
  117. every: 1m
  118. warn: $this > 98 * (($status >= $WARNING) ? (0.7) : (1))
  119. delay: down 15m multiplier 1.2 max 1h
  120. info: average percentage of time ${label:device} disk was busy over the last 10 minutes
  121. to: silent
  122. # raise an alarm if the disk backlog
  123. # is above 1000ms (1s) per second
  124. # for 10 minutes
  125. # (i.e. the disk cannot catch up)
  126. template: 10min_disk_backlog
  127. on: disk.backlog
  128. class: Latency
  129. type: System
  130. component: Disk
  131. os: linux
  132. hosts: *
  133. lookup: average -10m unaligned
  134. units: ms
  135. every: 1m
  136. warn: $this > 5000 * (($status >= $WARNING) ? (0.7) : (1))
  137. delay: down 15m multiplier 1.2 max 1h
  138. info: average backlog size of the ${label:device} disk over the last 10 minutes
  139. to: silent