Ilya Mashchenko 4 месяцев назад
Родитель
Сommit
572b54d4fc
2 измененных файлов с 74 добавлено и 1 удалено
  1. 17 1
      src/go/plugin/go.d/modules/rabbitmq/metadata.yaml
  2. 57 0
      src/health/health.d/rabbitmq.conf

+ 17 - 1
src/go/plugin/go.d/modules/rabbitmq/metadata.yaml

@@ -169,7 +169,23 @@ modules:
     troubleshooting:
       problems:
         list: []
-    alerts: []
+    alerts:
+      - name: rabbitmq_node_avail_status_down
+        metric: rabbitmq.node_avail_status
+        info: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq_node_mem_alarm_status_triggered
+        metric: rabbitmq.node_mem_alarm_status
+        info: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq.node_disk_free_alarm_status_triggered
+        metric: rabbitmq.node_disk_free_alarm_status
+        info: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
+      - name: rabbitmq_vhost_status_unhealthy
+        metric: rabbitmq.vhost_status
+        info: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+        link: https://github.com/netdata/netdata/blob/master/src/health/health.d/rabbitmq.conf
     metrics:
       folding:
         title: Metrics

+ 57 - 0
src/health/health.d/rabbitmq.conf

@@ -0,0 +1,57 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: rabbitmq_node_avail_status_down
+       on: rabbitmq.node_avail_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $down
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ node is down (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq_node_mem_alarm_status_triggered
+       on: rabbitmq.node_mem_alarm_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $triggered
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ mem alarm triggered (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq.node_disk_free_alarm_status_triggered
+       on: rabbitmq.node_disk_free_alarm_status
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $triggered
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+     info: RabbitMQ disk free alarm triggered (node ${label:node} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin
+
+ template: rabbitmq_vhost_status_unhealthy
+       on: rabbitmq.vhost_status	
+    class: Errors
+     type: Messaging
+component: RabbitMQ
+     calc: $stopped + $partial
+    every: 10s
+    units: status
+     warn: $this > 0
+  summary: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+     info: RabbitMQ vhost is not healthy (vhost ${label:vhost} cluster ${label:cluster_id})
+    delay: down 1m
+       to: sysadmin