vernemq.conf 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # Availability
  2. template: vernemq_last_collected_secs
  3. on: vernemq.node_uptime
  4. calc: $now - $last_collected_t
  5. units: seconds ago
  6. every: 10s
  7. warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
  8. crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
  9. delay: down 5m multiplier 1.5 max 1h
  10. info: number of seconds since the last successful data collection
  11. to: sysadmin
  12. # Socket errors
  13. template: vernemq_socket_errors
  14. on: vernemq.socket_errors
  15. lookup: sum -1m unaligned absolute of socket_error
  16. units: errors
  17. every: 10s
  18. warn: $this > (($status == $WARNING) ? (0) : (5))
  19. delay: down 5m multiplier 1.5 max 2h
  20. info: socket errors in the last minute
  21. to: sysadmin
  22. # Queues dropped/expired/unhandled PUBLISH messages
  23. template: vernemq_queue_message_drop
  24. on: vernemq.queue_undelivered_messages
  25. lookup: sum -1m unaligned absolute of queue_message_drop
  26. units: dropped messages
  27. every: 10s
  28. warn: $this > (($status == $WARNING) ? (0) : (5))
  29. delay: down 5m multiplier 1.5 max 2h
  30. info: dropped messaged due to full queues in the last minute
  31. to: sysadmin
  32. template: vernemq_queue_message_expired
  33. on: vernemq.queue_undelivered_messages
  34. lookup: sum -1m unaligned absolute of queue_message_expired
  35. units: expired messages
  36. every: 10s
  37. warn: $this > (($status == $WARNING) ? (0) : (15))
  38. delay: down 5m multiplier 1.5 max 2h
  39. info: messages which expired before delivery in the last minute
  40. to: sysadmin
  41. template: vernemq_queue_message_unhandled
  42. on: vernemq.queue_undelivered_messages
  43. lookup: sum -1m unaligned absolute of queue_message_unhandled
  44. units: unhandled messages
  45. every: 10s
  46. warn: $this > (($status == $WARNING) ? (0) : (5))
  47. delay: down 5m multiplier 1.5 max 2h
  48. info: unhandled messages (connections with clean session=true) in the last minute
  49. to: sysadmin
  50. # Erlang VM
  51. template: vernemq_average_scheduler_utilization
  52. on: vernemq.average_scheduler_utilization
  53. lookup: average -10m unaligned
  54. units: %
  55. every: 1m
  56. warn: $this > (($status >= $WARNING) ? (75) : (85))
  57. crit: $this > (($status == $CRITICAL) ? (85) : (95))
  58. delay: down 15m multiplier 1.5 max 1h
  59. info: average scheduler utilization for the last 10 minutes
  60. to: sysadmin
  61. # Cluster communication and netsplits
  62. template: vernemq_cluster_dropped
  63. on: vernemq.cluster_dropped
  64. lookup: average -1m unaligned
  65. units: KiB/s
  66. every: 10s
  67. warn: $this > 0
  68. delay: down 5m multiplier 1.5 max 1h
  69. info: the amount of traffic dropped during communication with the cluster nodes in the last minute
  70. to: sysadmin
  71. template: vernemq_netsplits
  72. on: vernemq.netsplits
  73. lookup: sum -1m unaligned absolute of netsplit_detected
  74. units: netsplits
  75. every: 10s
  76. warn: $this > 0
  77. delay: down 5m multiplier 1.5 max 2h
  78. info: detected netsplits in the last minute
  79. to: sysadmin
  80. # Unsuccessful CONNACK
  81. template: vernemq_mqtt_connack_sent_reason_success
  82. on: vernemq.mqtt_connack_sent_reason
  83. lookup: sum -1m unaligned absolute match-names of success
  84. units: packets
  85. every: 10s
  86. info: successful v3/v5 CONNACK sent in the last minute
  87. to: sysadmin
  88. template: vernemq_mqtt_connack_sent_reason_unsuccessful
  89. on: vernemq.mqtt_connack_sent_reason
  90. lookup: sum -1m unaligned absolute
  91. calc: $this - $vernemq_mqtt_connack_sent_reason_success
  92. units: packets
  93. every: 10s
  94. warn: $this > 0
  95. delay: down 5m multiplier 1.5 max 2h
  96. info: unsuccessful v3/v5 CONNACK sent in the last minute
  97. to: sysadmin
  98. # Not normal DISCONNECT
  99. template: vernemq_mqtt_disconnect_received_reason_normal_disconnect
  100. on: vernemq.mqtt_disconnect_received_reason
  101. lookup: sum -1m unaligned absolute match-names of normal_disconnect
  102. units: packets
  103. every: 10s
  104. info: normal v5 DISCONNECT received in the last minute
  105. to: sysadmin
  106. template: vernemq_mqtt_disconnect_sent_reason_normal_disconnect
  107. on: vernemq.mqtt_disconnect_sent_reason
  108. lookup: sum -1m unaligned absolute match-names of normal_disconnect
  109. units: packets
  110. every: 10s
  111. info: normal v5 DISCONNECT sent in the last minute
  112. to: sysadmin
  113. template: vernemq_mqtt_disconnect_received_reason_not_normal
  114. on: vernemq.mqtt_disconnect_received_reason
  115. lookup: sum -1m unaligned absolute
  116. calc: $this - $vernemq_mqtt_disconnect_received_reason_normal_disconnect
  117. units: packets
  118. every: 10s
  119. warn: $this > 0
  120. delay: down 5m multiplier 1.5 max 2h
  121. info: not normal v5 DISCONNECT received in the last minute
  122. to: sysadmin
  123. template: vernemq_mqtt_disconnect_sent_reason_not_normal
  124. on: vernemq.mqtt_disconnect_sent_reason
  125. lookup: sum -1m unaligned absolute
  126. calc: $this - $vernemq_mqtt_disconnect_sent_reason_normal_disconnect
  127. units: packets
  128. every: 10s
  129. warn: $this > 0
  130. delay: down 5m multiplier 1.5 max 2h
  131. info: not normal v5 DISCONNECT sent in the last minute
  132. to: sysadmin
  133. # SUBSCRIBE errors and unauthorized attempts
  134. template: vernemq_mqtt_subscribe_error
  135. on: vernemq.mqtt_subscribe_error
  136. lookup: sum -1m unaligned absolute
  137. units: failed ops
  138. every: 10s
  139. warn: $this > 0
  140. delay: down 5m multiplier 1.5 max 2h
  141. info: failed v3/v5 SUBSCRIBE operations in the last minute
  142. to: sysadmin
  143. template: vernemq_mqtt_subscribe_auth_error
  144. on: vernemq.mqtt_subscribe_auth_error
  145. lookup: sum -1m unaligned absolute
  146. units: attempts
  147. every: 10s
  148. warn: $this > 0
  149. delay: down 5m multiplier 1.5 max 2h
  150. info: unauthorized v3/v5 SUBSCRIBE attempts in the last minute
  151. to: sysadmin
  152. # UNSUBSCRIBE errors
  153. template: vernemq_mqtt_unsubscribe_error
  154. on: vernemq.mqtt_unsubscribe_error
  155. lookup: sum -1m unaligned absolute
  156. units: failed ops
  157. every: 10s
  158. warn: $this > 0
  159. delay: down 5m multiplier 1.5 max 2h
  160. info: failed v3/v5 UNSUBSCRIBE operations in the last minute
  161. to: sysadmin
  162. # PUBLISH errors and unauthorized attempts
  163. template: vernemq_mqtt_publish_errors
  164. on: vernemq.mqtt_publish_errors
  165. lookup: sum -1m unaligned absolute
  166. units: failed ops
  167. every: 10s
  168. warn: $this > 0
  169. delay: down 5m multiplier 1.5 max 2h
  170. info: failed v3/v5 PUBLISH operations in the last minute
  171. to: sysadmin
  172. template: vernemq_mqtt_publish_auth_errors
  173. on: vernemq.mqtt_publish_auth_errors
  174. lookup: sum -1m unaligned absolute
  175. units: attempts
  176. every: 10s
  177. warn: $this > 0
  178. delay: down 5m multiplier 1.5 max 2h
  179. info: unauthorized v3/v5 PUBLISH attempts in the last minute
  180. to: sysadmin
  181. # Unsuccessful and unexpected PUBACK
  182. template: vernemq_mqtt_puback_received_reason_success
  183. on: vernemq.mqtt_puback_received_reason
  184. lookup: sum -1m unaligned absolute match-names of success
  185. units: packets
  186. every: 10s
  187. info: successful v5 PUBACK received in the last minute
  188. to: sysadmin
  189. template: vernemq_mqtt_puback_sent_reason_success
  190. on: vernemq.mqtt_puback_sent_reason
  191. lookup: sum -1m unaligned absolute match-names of success
  192. units: packets
  193. every: 10s
  194. info: successful v5 PUBACK sent in the last minute
  195. to: sysadmin
  196. template: vernemq_mqtt_puback_received_reason_unsuccessful
  197. on: vernemq.mqtt_puback_received_reason
  198. lookup: sum -1m unaligned absolute
  199. calc: $this - $vernemq_mqtt_puback_received_reason_success
  200. units: packets
  201. every: 10s
  202. warn: $this > 0
  203. delay: down 5m multiplier 1.5 max 2h
  204. info: unsuccessful v5 PUBACK received in the last minute
  205. to: sysadmin
  206. template: vernemq_mqtt_puback_sent_reason_unsuccessful
  207. on: vernemq.mqtt_puback_sent_reason
  208. lookup: sum -1m unaligned absolute
  209. calc: $this - $vernemq_mqtt_puback_sent_reason_success
  210. units: packets
  211. every: 10s
  212. warn: $this > 0
  213. delay: down 5m multiplier 1.5 max 2h
  214. info: unsuccessful v5 PUBACK sent in the last minute
  215. to: sysadmin
  216. template: vernemq_mqtt_puback_unexpected
  217. on: vernemq.mqtt_puback_invalid_error
  218. lookup: sum -1m unaligned absolute
  219. units: messages
  220. every: 10s
  221. warn: $this > 0
  222. delay: down 5m multiplier 1.5 max 2h
  223. info: unexpected v3/v5 PUBACK received in the last minute
  224. to: sysadmin
  225. # Unsuccessful and unexpected PUBREC
  226. template: vernemq_mqtt_pubrec_received_reason_success
  227. on: vernemq.mqtt_pubrec_received_reason
  228. lookup: sum -1m unaligned absolute match-names of success
  229. units: packets
  230. every: 10s
  231. info: successful v5 PUBREC received in the last minute
  232. to: sysadmin
  233. template: vernemq_mqtt_pubrec_sent_reason_success
  234. on: vernemq.mqtt_pubrec_sent_reason
  235. lookup: sum -1m unaligned absolute match-names of success
  236. units: packets
  237. every: 10s
  238. info: successful v5 PUBREC sent in the last minute
  239. to: sysadmin
  240. template: vernemq_mqtt_pubrec_received_reason_unsuccessful
  241. on: vernemq.mqtt_pubrec_received_reason
  242. lookup: sum -1m unaligned absolute
  243. calc: $this - $vernemq_mqtt_pubrec_received_reason_success
  244. units: packets
  245. every: 10s
  246. warn: $this > 0
  247. delay: down 5m multiplier 1.5 max 2h
  248. info: unsuccessful v5 PUBREC received in the last minute
  249. to: sysadmin
  250. template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
  251. on: vernemq.mqtt_pubrec_sent_reason
  252. lookup: sum -1m unaligned absolute
  253. calc: $this - $vernemq_mqtt_pubrec_sent_reason_success
  254. units: packets
  255. every: 10s
  256. warn: $this > 0
  257. delay: down 5m multiplier 1.5 max 2h
  258. info: unsuccessful v5 PUBREC sent in the last minute
  259. to: sysadmin
  260. template: vernemq_mqtt_pubrec_invalid_error
  261. on: vernemq.mqtt_pubrec_invalid_error
  262. lookup: sum -1m unaligned absolute
  263. units: messages
  264. every: 10s
  265. warn: $this > 0
  266. delay: down 5m multiplier 1.5 max 2h
  267. info: unexpected v3 PUBREC received in the last minute
  268. to: sysadmin
  269. # Unsuccessful PUBREL
  270. template: vernemq_mqtt_pubrel_received_reason_success
  271. on: vernemq.mqtt_pubrel_received_reason
  272. lookup: sum -1m unaligned absolute match-names of success
  273. units: packets
  274. every: 10s
  275. info: successful v5 PUBREL received in the last minute
  276. to: sysadmin
  277. template: vernemq_mqtt_pubrel_sent_reason_success
  278. on: vernemq.mqtt_pubrel_sent_reason
  279. lookup: sum -1m unaligned absolute match-names of success
  280. units: packets
  281. every: 10s
  282. info: successful v5 PUBREL sent in the last minute
  283. to: sysadmin
  284. template: vernemq_mqtt_pubrel_received_reason_unsuccessful
  285. on: vernemq.mqtt_pubrel_received_reason
  286. lookup: sum -1m unaligned absolute
  287. calc: $this - $vernemq_mqtt_pubrel_received_reason_success
  288. units: packets
  289. every: 10s
  290. warn: $this > 0
  291. delay: down 5m multiplier 1.5 max 2h
  292. info: unsuccessful v5 PUBREL received in the last minute
  293. to: sysadmin
  294. template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
  295. on: vernemq.mqtt_pubrel_sent_reason
  296. lookup: sum -1m unaligned absolute
  297. calc: $this - $vernemq_mqtt_pubrel_sent_reason_success
  298. units: packets
  299. every: 10s
  300. warn: $this > 0
  301. delay: down 5m multiplier 1.5 max 2h
  302. info: unsuccessful v5 PUBREL sent in the last minute
  303. to: sysadmin
  304. # Unsuccessful and unexpected PUBCOMP
  305. template: vernemq_mqtt_pubcomp_received_reason_success
  306. on: vernemq.mqtt_pubcomp_received_reason
  307. lookup: sum -1m unaligned absolute match-names of success
  308. units: packets
  309. every: 10s
  310. info: successful v5 PUBCOMP received in the last minute
  311. to: sysadmin
  312. template: vernemq_mqtt_pubcomp_sent_reason_success
  313. on: vernemq.mqtt_pubcomp_sent_reason
  314. lookup: sum -1m unaligned absolute match-names of success
  315. units: packets
  316. every: 10s
  317. info: successful v5 PUBCOMP sent in the last minute
  318. to: sysadmin
  319. template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
  320. on: vernemq.mqtt_pubcomp_received_reason
  321. lookup: sum -1m unaligned absolute
  322. calc: $this - $vernemq_mqtt_pubcomp_received_reason_success
  323. units: packets
  324. every: 10s
  325. warn: $this > 0
  326. delay: down 5m multiplier 1.5 max 2h
  327. info: unsuccessful v5 PUBCOMP received in the last minute
  328. to: sysadmin
  329. template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
  330. on: vernemq.mqtt_pubcomp_sent_reason
  331. lookup: sum -1m unaligned absolute
  332. calc: $this - $vernemq_mqtt_pubcomp_sent_reason_success
  333. units: packets
  334. every: 10s
  335. warn: $this > 0
  336. delay: down 5m multiplier 1.5 max 2h
  337. info: unsuccessful v5 PUBCOMP sent in the last minute
  338. to: sysadmin
  339. template: vernemq_mqtt_pubcomp_unexpected
  340. on: vernemq.mqtt_pubcomp_invalid_error
  341. lookup: sum -1m unaligned absolute
  342. units: messages
  343. every: 10s
  344. warn: $this > 0
  345. delay: down 5m multiplier 1.5 max 2h
  346. info: unexpected v3/v5 PUBCOMP received in the last minute
  347. to: sysadmin