123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- # Availability
- template: vernemq_last_collected_secs
- on: vernemq.node_uptime
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: sysadmin
- # Socket errors
- template: vernemq_socket_errors
- on: vernemq.socket_errors
- lookup: sum -1m unaligned absolute of socket_error
- units: errors
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: socket errors in the last minute
- to: sysadmin
- # Queues dropped/expired/unhandled PUBLISH messages
- template: vernemq_queue_message_drop
- on: vernemq.queue_undelivered_messages
- lookup: sum -1m unaligned absolute of queue_message_drop
- units: dropped messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: dropped messaged due to full queues in the last minute
- to: sysadmin
- template: vernemq_queue_message_expired
- on: vernemq.queue_undelivered_messages
- lookup: sum -1m unaligned absolute of queue_message_expired
- units: expired messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (15))
- delay: down 5m multiplier 1.5 max 2h
- info: messages which expired before delivery in the last minute
- to: sysadmin
- template: vernemq_queue_message_unhandled
- on: vernemq.queue_undelivered_messages
- lookup: sum -1m unaligned absolute of queue_message_unhandled
- units: unhandled messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: unhandled messages (connections with clean session=true) in the last minute
- to: sysadmin
- # Erlang VM
- template: vernemq_average_scheduler_utilization
- on: vernemq.average_scheduler_utilization
- lookup: average -10m unaligned
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (75) : (85))
- crit: $this > (($status == $CRITICAL) ? (85) : (95))
- delay: down 15m multiplier 1.5 max 1h
- info: average scheduler utilization for the last 10 minutes
- to: sysadmin
- # Cluster communication and netsplits
- template: vernemq_cluster_dropped
- on: vernemq.cluster_dropped
- lookup: average -1m unaligned
- units: KiB/s
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 1h
- info: the amount of traffic dropped during communication with the cluster nodes in the last minute
- to: sysadmin
- template: vernemq_netsplits
- on: vernemq.netsplits
- lookup: sum -1m unaligned absolute of netsplit_detected
- units: netsplits
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: detected netsplits in the last minute
- to: sysadmin
- # Unsuccessful CONNACK
- template: vernemq_mqtt_connack_sent_reason_success
- on: vernemq.mqtt_connack_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v3/v5 CONNACK sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_connack_sent_reason_unsuccessful
- on: vernemq.mqtt_connack_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_connack_sent_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v3/v5 CONNACK sent in the last minute
- to: sysadmin
- # Not normal DISCONNECT
- template: vernemq_mqtt_disconnect_received_reason_normal_disconnect
- on: vernemq.mqtt_disconnect_received_reason
- lookup: sum -1m unaligned absolute match-names of normal_disconnect
- units: packets
- every: 10s
- info: normal v5 DISCONNECT received in the last minute
- to: sysadmin
- template: vernemq_mqtt_disconnect_sent_reason_normal_disconnect
- on: vernemq.mqtt_disconnect_sent_reason
- lookup: sum -1m unaligned absolute match-names of normal_disconnect
- units: packets
- every: 10s
- info: normal v5 DISCONNECT sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_disconnect_received_reason_not_normal
- on: vernemq.mqtt_disconnect_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_disconnect_received_reason_normal_disconnect
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: not normal v5 DISCONNECT received in the last minute
- to: sysadmin
- template: vernemq_mqtt_disconnect_sent_reason_not_normal
- on: vernemq.mqtt_disconnect_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_disconnect_sent_reason_normal_disconnect
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: not normal v5 DISCONNECT sent in the last minute
- to: sysadmin
- # SUBSCRIBE errors and unauthorized attempts
- template: vernemq_mqtt_subscribe_error
- on: vernemq.mqtt_subscribe_error
- lookup: sum -1m unaligned absolute
- units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 SUBSCRIBE operations in the last minute
- to: sysadmin
- template: vernemq_mqtt_subscribe_auth_error
- on: vernemq.mqtt_subscribe_auth_error
- lookup: sum -1m unaligned absolute
- units: attempts
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unauthorized v3/v5 SUBSCRIBE attempts in the last minute
- to: sysadmin
- # UNSUBSCRIBE errors
- template: vernemq_mqtt_unsubscribe_error
- on: vernemq.mqtt_unsubscribe_error
- lookup: sum -1m unaligned absolute
- units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 UNSUBSCRIBE operations in the last minute
- to: sysadmin
- # PUBLISH errors and unauthorized attempts
- template: vernemq_mqtt_publish_errors
- on: vernemq.mqtt_publish_errors
- lookup: sum -1m unaligned absolute
- units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 PUBLISH operations in the last minute
- to: sysadmin
- template: vernemq_mqtt_publish_auth_errors
- on: vernemq.mqtt_publish_auth_errors
- lookup: sum -1m unaligned absolute
- units: attempts
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unauthorized v3/v5 PUBLISH attempts in the last minute
- to: sysadmin
- # Unsuccessful and unexpected PUBACK
- template: vernemq_mqtt_puback_received_reason_success
- on: vernemq.mqtt_puback_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBACK received in the last minute
- to: sysadmin
- template: vernemq_mqtt_puback_sent_reason_success
- on: vernemq.mqtt_puback_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBACK sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_puback_received_reason_unsuccessful
- on: vernemq.mqtt_puback_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_puback_received_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBACK received in the last minute
- to: sysadmin
- template: vernemq_mqtt_puback_sent_reason_unsuccessful
- on: vernemq.mqtt_puback_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_puback_sent_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBACK sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_puback_unexpected
- on: vernemq.mqtt_puback_invalid_error
- lookup: sum -1m unaligned absolute
- units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3/v5 PUBACK received in the last minute
- to: sysadmin
- # Unsuccessful and unexpected PUBREC
- template: vernemq_mqtt_pubrec_received_reason_success
- on: vernemq.mqtt_pubrec_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREC received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrec_sent_reason_success
- on: vernemq.mqtt_pubrec_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREC sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrec_received_reason_unsuccessful
- on: vernemq.mqtt_pubrec_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrec_received_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREC received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
- on: vernemq.mqtt_pubrec_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrec_sent_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREC sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrec_invalid_error
- on: vernemq.mqtt_pubrec_invalid_error
- lookup: sum -1m unaligned absolute
- units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3 PUBREC received in the last minute
- to: sysadmin
- # Unsuccessful PUBREL
- template: vernemq_mqtt_pubrel_received_reason_success
- on: vernemq.mqtt_pubrel_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREL received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrel_sent_reason_success
- on: vernemq.mqtt_pubrel_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREL sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrel_received_reason_unsuccessful
- on: vernemq.mqtt_pubrel_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrel_received_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREL received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
- on: vernemq.mqtt_pubrel_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrel_sent_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREL sent in the last minute
- to: sysadmin
- # Unsuccessful and unexpected PUBCOMP
- template: vernemq_mqtt_pubcomp_received_reason_success
- on: vernemq.mqtt_pubcomp_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBCOMP received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubcomp_sent_reason_success
- on: vernemq.mqtt_pubcomp_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBCOMP sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
- on: vernemq.mqtt_pubcomp_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubcomp_received_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBCOMP received in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
- on: vernemq.mqtt_pubcomp_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubcomp_sent_reason_success
- units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBCOMP sent in the last minute
- to: sysadmin
- template: vernemq_mqtt_pubcomp_unexpected
- on: vernemq.mqtt_pubcomp_invalid_error
- lookup: sum -1m unaligned absolute
- units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3/v5 PUBCOMP received in the last minute
- to: sysadmin
|