SMusatov
/
netdata
mirror of https://github.com/netdata/netdata.git


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
							# below are some examples of using the `anomaly-bit` option to define alerts based on anomaly 
# rates as opposed to raw metric values. You can read more about the anomaly-bit and Netdata's 
# native anomaly detection here: 
# https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal

# some examples below are commented, you would need to uncomment and adjust as desired to enable them.

# node level anomaly rate
# https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate
# if node level anomaly rate is above 1% then warning (pick your own threshold that works best via trial and error).
 template: ml_1min_node_ar
       on: anomaly_detection.anomaly_rate
    class: Workload
     type: System
component: ML
       os: *
    hosts: *
   lookup: average -1m of anomaly_rate
     calc: $this
    units: %
    every: 30s
     warn: $this > 1
  summary: ML node anomaly rate
     info: Rolling 1min node level anomaly rate
       to: silent

# alert per dimension example
# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
# template: ml_5min_cpu_dims
#       on: system.cpu
#       os: linux
#    hosts: *
#   lookup: average -5m anomaly-bit foreach *
#     calc: $this
#    units: %
#    every: 30s
#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
#     info: rolling 5min anomaly rate for each system.cpu dimension

# alert per chart example
# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
# template: ml_5min_cpu_chart
#       on: system.cpu
#       os: linux
#    hosts: *
#   lookup: average -5m anomaly-bit of *
#     calc: $this
#    units: %
#    every: 30s
#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
#     info: rolling 5min anomaly rate for system.cpu chart