Browse Source

add rabbitmq alerts (#18972)

Ilya Mashchenko 4 months ago
parent
commit
572b54d4fc

+ 17 - 1
src/go/plugin/go.d/modules/rabbitmq/metadata.yaml

@@ -169,7 +169,23 @@ modules:
     troubleshooting:
     troubleshooting:
       problems:
       problems:
         list: []
         list: []
-    alerts: []
+    alerts:
+      - name: rabbitmq_node_avail_status_down
+        metric: rabbitmq.node_avail_status
+        info: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq_node_mem_alarm_status_triggered
+        metric: rabbitmq.node_mem_alarm_status
+        info: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq.node_disk_free_alarm_status_triggered
+        metric: rabbitmq.node_disk_free_alarm_status
+        info: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq_vhost_status_unhealthy
+        metric: rabbitmq.vhost_status
+        info: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
     metrics:
     metrics:
       folding:
       folding:
         title: Metrics
         title: Metrics

+ 57 - 0
src/health/health.d/rabbitmq.conf

@@ -0,0 +1,57 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: rabbitmq_node_avail_status_down
+       on: rabbitmq.node_avail_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $down
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq_node_mem_alarm_status_triggered
+       on: rabbitmq.node_mem_alarm_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $triggered
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq.node_disk_free_alarm_status_triggered
+       on: rabbitmq.node_disk_free_alarm_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $triggered
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq_vhost_status_unhealthy
+       on: rabbitmq.vhost_status	
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $stopped + $partial
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+     info: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin