|
@@ -0,0 +1,399 @@
|
|
|
|
+
|
|
|
|
+# Availability
|
|
|
|
+
|
|
|
|
+template: vernemq_last_collected_secs
|
|
|
|
+ on: vernemq.node_uptime
|
|
|
|
+ calc: $now - $last_collected_t
|
|
|
|
+ units: seconds ago
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
|
|
|
|
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
|
|
|
|
+ delay: down 5m multiplier 1.5 max 1h
|
|
|
|
+ info: number of seconds since the last successful data collection
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Socket errors
|
|
|
|
+
|
|
|
|
+template: vernemq_socket_errors
|
|
|
|
+ on: vernemq.socket_errors
|
|
|
|
+ lookup: sum -1m unaligned absolute of socket_error
|
|
|
|
+ units: errors
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > (($status == $WARNING) ? (0) : (5))
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: socket errors in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Queues dropped/expired/unhandled PUBLISH messages
|
|
|
|
+
|
|
|
|
+template: vernemq_queue_message_drop
|
|
|
|
+ on: vernemq.queue_undelivered_messages
|
|
|
|
+ lookup: sum -1m unaligned absolute of queue_message_drop
|
|
|
|
+ units: dropped messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > (($status == $WARNING) ? (0) : (5))
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: dropped messaged due to full queues in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_queue_message_expired
|
|
|
|
+ on: vernemq.queue_undelivered_messages
|
|
|
|
+ lookup: sum -1m unaligned absolute of queue_message_expired
|
|
|
|
+ units: expired messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > (($status == $WARNING) ? (0) : (15))
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: messages which expired before delivery in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_queue_message_unhandled
|
|
|
|
+ on: vernemq.queue_undelivered_messages
|
|
|
|
+ lookup: sum -1m unaligned absolute of queue_message_unhandled
|
|
|
|
+ units: unhandled messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > (($status == $WARNING) ? (0) : (5))
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unhandled messages (connections with clean session=true) in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Erlang VM
|
|
|
|
+
|
|
|
|
+template: vernemq_average_scheduler_utilization
|
|
|
|
+ on: vernemq.average_scheduler_utilization
|
|
|
|
+ lookup: average -10m unaligned
|
|
|
|
+ units: %
|
|
|
|
+ every: 1m
|
|
|
|
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
|
|
|
|
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
|
|
|
|
+ delay: down 15m multiplier 1.5 max 1h
|
|
|
|
+ info: average scheduler utilization for the last 10 minutes
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Cluster communication and netsplits
|
|
|
|
+
|
|
|
|
+template: vernemq_cluster_dropped
|
|
|
|
+ on: vernemq.cluster_dropped
|
|
|
|
+ lookup: average -1m unaligned
|
|
|
|
+ units: KiB/s
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 1h
|
|
|
|
+ info: the amount of traffic dropped during communication with the cluster nodes in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_netsplits
|
|
|
|
+ on: vernemq.netsplits
|
|
|
|
+ lookup: sum -1m unaligned absolute of netsplit_detected
|
|
|
|
+ units: netsplits
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: detected netsplits in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Unsuccessful CONNACK
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_connack_sent_reason_success
|
|
|
|
+ on: vernemq.mqtt_connack_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v3/v5 CONNACK sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_connack_sent_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_connack_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_connack_sent_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v3/v5 CONNACK sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Not normal DISCONNECT
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_disconnect_received_reason_normal_disconnect
|
|
|
|
+ on: vernemq.mqtt_disconnect_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of normal_disconnect
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: normal v5 DISCONNECT received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_disconnect_sent_reason_normal_disconnect
|
|
|
|
+ on: vernemq.mqtt_disconnect_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of normal_disconnect
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: normal v5 DISCONNECT sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_disconnect_received_reason_not_normal
|
|
|
|
+ on: vernemq.mqtt_disconnect_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_disconnect_received_reason_normal_disconnect
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: not normal v5 DISCONNECT received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_disconnect_sent_reason_not_normal
|
|
|
|
+ on: vernemq.mqtt_disconnect_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_disconnect_sent_reason_normal_disconnect
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: not normal v5 DISCONNECT sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# SUBSCRIBE errors and unauthorized attempts
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_subscribe_error
|
|
|
|
+ on: vernemq.mqtt_subscribe_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: failed ops
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: failed v3/v5 SUBSCRIBE operations in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_subscribe_auth_error
|
|
|
|
+ on: vernemq.mqtt_subscribe_auth_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: attempts
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unauthorized v3/v5 SUBSCRIBE attempts in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# UNSUBSCRIBE errors
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_unsubscribe_error
|
|
|
|
+ on: vernemq.mqtt_unsubscribe_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: failed ops
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: failed v3/v5 UNSUBSCRIBE operations in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# PUBLISH errors and unauthorized attempts
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_publish_errors
|
|
|
|
+ on: vernemq.mqtt_publish_errors
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: failed ops
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: failed v3/v5 PUBLISH operations in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_publish_auth_errors
|
|
|
|
+ on: vernemq.mqtt_publish_auth_errors
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: attempts
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unauthorized v3/v5 PUBLISH attempts in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Unsuccessful and unexpected PUBACK
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_puback_received_reason_success
|
|
|
|
+ on: vernemq.mqtt_puback_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBACK received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_puback_sent_reason_success
|
|
|
|
+ on: vernemq.mqtt_puback_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBACK sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_puback_received_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_puback_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_puback_received_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBACK received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_puback_sent_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_puback_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_puback_sent_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBACK sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_puback_unexpected
|
|
|
|
+ on: vernemq.mqtt_puback_invalid_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unexpected v3/v5 PUBACK received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Unsuccessful and unexpected PUBREC
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrec_received_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubrec_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBREC received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrec_sent_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubrec_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBREC sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrec_received_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubrec_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubrec_received_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBREC received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubrec_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubrec_sent_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBREC sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrec_invalid_error
|
|
|
|
+ on: vernemq.mqtt_pubrec_invalid_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unexpected v3 PUBREC received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Unsuccessful PUBREL
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrel_received_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubrel_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBREL received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrel_sent_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubrel_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBREL sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrel_received_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubrel_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubrel_received_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBREL received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubrel_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubrel_sent_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBREL sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+# Unsuccessful and unexpected PUBCOMP
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubcomp_received_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubcomp_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBCOMP received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubcomp_sent_reason_success
|
|
|
|
+ on: vernemq.mqtt_pubcomp_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute match-names of success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ info: successful v5 PUBCOMP sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubcomp_received_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubcomp_received_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBCOMP received in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
|
|
|
|
+ on: vernemq.mqtt_pubcomp_sent_reason
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ calc: $this - $vernemq_mqtt_pubcomp_sent_reason_success
|
|
|
|
+ units: packets
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unsuccessful v5 PUBCOMP sent in the last minute
|
|
|
|
+ to: sysadmin
|
|
|
|
+
|
|
|
|
+template: vernemq_mqtt_pubcomp_unexpected
|
|
|
|
+ on: vernemq.mqtt_pubcomp_invalid_error
|
|
|
|
+ lookup: sum -1m unaligned absolute
|
|
|
|
+ units: messages
|
|
|
|
+ every: 10s
|
|
|
|
+ warn: $this > 0
|
|
|
|
+ delay: down 5m multiplier 1.5 max 2h
|
|
|
|
+ info: unexpected v3/v5 PUBCOMP received in the last minute
|
|
|
|
+ to: sysadmin
|