ml.conf 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # below are some examples of using the `anomaly-bit` option to define alerts based on anomaly
  2. # rates as opposed to raw metric values. You can read more about the anomaly-bit and Netdata's
  3. # native anomaly detection here:
  4. # https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal
  5. # examples below are commented, you would need to uncomment and adjust as desired to enable them.
  6. # node level anomaly rate example
  7. # https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate
  8. # if node level anomaly rate is between 1-5% then warning (pick your own threshold that works best via tial and error).
  9. # if node level anomaly rate is above 5% then critical (pick your own threshold that works best via tial and error).
  10. # template: ml_1min_node_ar
  11. # on: anomaly_detection.anomaly_rate
  12. # os: linux
  13. # hosts: *
  14. # lookup: average -1m foreach anomaly_rate
  15. # calc: $this
  16. # units: %
  17. # every: 30s
  18. # warn: $this > (($status >= $WARNING) ? (1) : (5))
  19. # crit: $this > (($status == $CRITICAL) ? (5) : (100))
  20. # info: rolling 1min node level anomaly rate
  21. # alert per dimension example
  22. # if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
  23. # if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
  24. # template: ml_5min_cpu_dims
  25. # on: system.cpu
  26. # os: linux
  27. # hosts: *
  28. # lookup: average -5m anomaly-bit foreach *
  29. # calc: $this
  30. # units: %
  31. # every: 30s
  32. # warn: $this > (($status >= $WARNING) ? (5) : (20))
  33. # crit: $this > (($status == $CRITICAL) ? (20) : (100))
  34. # info: rolling 5min anomaly rate for each system.cpu dimension
  35. # alert per chart example
  36. # if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
  37. # if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
  38. # template: ml_5min_cpu_chart
  39. # on: system.cpu
  40. # os: linux
  41. # hosts: *
  42. # lookup: average -5m anomaly-bit of *
  43. # calc: $this
  44. # units: %
  45. # every: 30s
  46. # warn: $this > (($status >= $WARNING) ? (5) : (20))
  47. # crit: $this > (($status == $CRITICAL) ? (20) : (100))
  48. # info: rolling 5min anomaly rate for system.cpu chart