SMusatov
/
netdata
mirror of https://github.com/netdata/netdata.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
							# netdata python.d.plugin configuration for anomalies
#
# This file is in YaML format. Generally the format is:
#
# name: value
#
# There are 2 sections:
#  - global variables
#  - one or more JOBS
#
# JOBS allow you to collect values from multiple sources.
# Each source will have its own set of charts.
#
# JOB parameters have to be indented (using spaces only, example below).

# ----------------------------------------------------------------------
# Global Variables
# These variables set the defaults for all JOBs, however each JOB
# may define its own, overriding the defaults.

# update_every sets the default data collection frequency.
# If unset, the python.d.plugin default is used.
# update_every: 2

# priority controls the order of charts at the netdata dashboard.
# Lower numbers move the charts towards the top of the page.
# If unset, the default for python.d.plugin is used.
# priority: 60000

# ----------------------------------------------------------------------
# JOBS (data collection sources)

# Pull data from local Netdata node.
anomalies:
    name: 'Anomalies'

    # Host to pull data from.
    host: '127.0.0.1:19999'

    # Username and Password for Netdata if using basic auth.
    # username: '???'
    # password: '???'

    # Use http or https to pull data
    protocol: 'http'

    # SSL verify parameter for requests.get() calls
    tls_verify: true

    # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
    charts_regex: 'system\..*'

    # Charts to exclude, useful if you would like to exclude some specific charts. 
    # Note: should be a ',' separated string like 'chart.name,chart.name'.
    charts_to_exclude: 'system.uptime,system.entropy'

    # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'. 
    # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html.
    model: 'pca'

    # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs.
    train_max_n: 100000

    # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes).
    # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained.
    train_every_n: 1800

    # The length of the window of data to train on (14400 = last 4 hours).
    train_n_secs: 14400

    # How many prediction steps after a train event to just use previous prediction value for. 
    # Used to reduce possibility of the training step itself appearing as an anomaly on the charts.
    train_no_prediction_n: 10

    # If you would like to train the model for the first time on a specific window then you can define it using the below two variables.
    # Start of training data for initial model.
    # initial_train_data_after: 1604578857

    # End of training data for initial model.
    # initial_train_data_before: 1604593257

    # If you would like to ignore recent data in training then you can offset it by offset_n_secs.
    offset_n_secs: 0

    # How many lagged values of each dimension to include in the 'feature vector' each model is trained on.
    lags_n: 5

    # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on.
    smooth_n: 3

    # How many differences to take in preprocessing your data. 
    # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing
    # diffs_n=0 would mean training models on the raw values of each dimension.
    # diffs_n=1 means everything is done in terms of differences. 
    diffs_n: 1

    # What is the typical proportion of anomalies in your data on average? 
    # This parameter can control the sensitivity of your models to anomalies. 
    # Some discussion here: https://github.com/yzhao062/pyod/issues/144
    contamination: 0.001

    # Set to true to include an "average_prob" dimension on anomalies probability chart which is 
    # just the average of all anomaly probabilities at each time step
    include_average_prob: true

    # Define any custom models you would like to create anomaly probabilities for, some examples below to show how.
    # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers
    # and one on the cpu and mem apps metrics for the python.d.plugin.
    # custom_models:
    #   - name: 'demos_cpu'
    #     dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
    #   - name: 'apps_python_d_plugin'
    #     dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin'

    # Set to true to normalize, using min-max standardization, features used for the custom models. 
    # Useful if your custom models contain dimensions on very different scales an model you use does 
    # not internally do its own normalization. Usually best to leave as false.
    # custom_models_normalize: false

# Standalone Custom models example as an additional collector job.
# custom:
#     name: 'custom'
#     host: '127.0.0.1:19999'
#     protocol: 'http'
#     charts_regex: 'None'
#     charts_to_exclude: 'None'
#     model: 'pca'
#     train_max_n: 100000
#     train_every_n: 1800
#     train_n_secs: 14400
#     offset_n_secs: 0
#     lags_n: 5
#     smooth_n: 3
#     diffs_n: 1
#     contamination: 0.001
#     custom_models:
#       - name: 'user_netdata'
#         dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata'
#       - name: 'apps_python_d_plugin'
#         dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin'

# Pull data from some demo nodes for cross node custom models.
# demos:
#     name: 'demos'
#     host: '127.0.0.1:19999'
#     protocol: 'http'
#     charts_regex: 'None'
#     charts_to_exclude: 'None'
#     model: 'pca'
#     train_max_n: 100000
#     train_every_n: 1800
#     train_n_secs: 14400
#     offset_n_secs: 0
#     lags_n: 5
#     smooth_n: 3
#     diffs_n: 1
#     contamination: 0.001
#     custom_models:
#       - name: 'system.cpu'
#         dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
#       - name: 'system.ip'
#         dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent'
#       - name: 'system.net'
#         dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent'
#       - name: 'system.io'
#         dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out'

# Example additional job if you want to also pull data from a child streaming to your 
# local parent or even a remote node so long as the Netdata REST API is accessible. 
# mychildnode1:
#     name: 'mychildnode1'
#     host: '127.0.0.1:19999/host/mychildnode1'
#     protocol: 'http'
#     charts_regex: 'system\..*'
#     charts_to_exclude: 'None'
#     model: 'pca'
#     train_max_n: 100000
#     train_every_n: 1800
#     train_n_secs: 14400
#     offset_n_secs: 0
#     lags_n: 5
#     smooth_n: 3
#     diffs_n: 1
#     contamination: 0.001