123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- # you can disable an alarm notification by setting the 'to' line to: silent
- # -----------------------------------------------------------------------------
- # low disk space
- # checking the latest collected values
- # raise an alarm if the disk is low on
- # available disk space
- template: disk_space_usage
- on: disk.space
- class: Utilization
- type: System
- component: Disk
- os: linux freebsd
- hosts: *
- chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
- calc: $used * 100 / ($avail + $used)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING ) ? (80) : (90))
- crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
- delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk ${label:mount_point} space utilization
- to: sysadmin
- template: disk_inode_usage
- on: disk.inodes
- class: Utilization
- type: System
- component: Disk
- os: linux freebsd
- hosts: *
- chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
- calc: $used * 100 / ($avail + $used)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (80) : (90))
- crit: $this > (($status == $CRITICAL) ? (90) : (98))
- delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk ${label:mount_point} inode utilization
- to: sysadmin
- # -----------------------------------------------------------------------------
- # disk fill rate
- # calculate the rate the disk fills
- # use as base, the available space change
- # during the last hour
- # this is just a calculation - it has no alarm
- # we will use it in the next template to find
- # the hours remaining
- template: disk_fill_rate
- on: disk.space
- os: linux freebsd
- hosts: *
- lookup: min -10m at -50m unaligned of avail
- calc: ($this - $avail) / (($now - $after) / 3600)
- every: 1m
- units: GB/hour
- info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
- # calculate the hours remaining
- # if the disk continues to fill
- # in this rate
- template: out_of_disk_space_time
- on: disk.space
- os: linux freebsd
- hosts: *
- calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
- units: hours
- every: 10s
- warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
- crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
- delay: down 15m multiplier 1.2 max 1h
- info: estimated time the disk will run out of space, if the system continues to add data with the rate of the last hour
- to: silent
- # -----------------------------------------------------------------------------
- # disk inode fill rate
- # calculate the rate the disk inodes are allocated
- # use as base, the available inodes change
- # during the last hour
- # this is just a calculation - it has no alarm
- # we will use it in the next template to find
- # the hours remaining
- template: disk_inode_rate
- on: disk.inodes
- os: linux freebsd
- hosts: *
- lookup: min -10m at -50m unaligned of avail
- calc: ($this - $avail) / (($now - $after) / 3600)
- every: 1m
- units: inodes/hour
- info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
- # calculate the hours remaining
- # if the disk inodes are allocated
- # in this rate
- template: out_of_disk_inodes_time
- on: disk.inodes
- os: linux freebsd
- hosts: *
- calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
- units: hours
- every: 10s
- warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
- crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
- delay: down 15m multiplier 1.2 max 1h
- info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
- to: silent
- # -----------------------------------------------------------------------------
- # disk congestion
- # raise an alarm if the disk is congested
- # by calculating the average disk utilization
- # for the last 10 minutes
- template: 10min_disk_utilization
- on: disk.util
- class: Utilization
- type: System
- component: Disk
- os: linux freebsd
- hosts: *
- lookup: average -10m unaligned
- units: %
- every: 1m
- warn: $this > 98 * (($status >= $WARNING) ? (0.7) : (1))
- delay: down 15m multiplier 1.2 max 1h
- info: average percentage of time ${label:device} disk was busy over the last 10 minutes
- to: silent
- # raise an alarm if the disk backlog
- # is above 1000ms (1s) per second
- # for 10 minutes
- # (i.e. the disk cannot catch up)
- template: 10min_disk_backlog
- on: disk.backlog
- class: Latency
- type: System
- component: Disk
- os: linux
- hosts: *
- lookup: average -10m unaligned
- units: ms
- every: 1m
- warn: $this > 5000 * (($status >= $WARNING) ? (0.7) : (1))
- delay: down 15m multiplier 1.2 max 1h
- info: average backlog size of the ${label:device} disk over the last 10 minutes
- to: silent
|