123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- # netdata python.d.plugin configuration for anomalies
- #
- # This file is in YaML format. Generally the format is:
- #
- # name: value
- #
- # There are 2 sections:
- # - global variables
- # - one or more JOBS
- #
- # JOBS allow you to collect values from multiple sources.
- # Each source will have its own set of charts.
- #
- # JOB parameters have to be indented (using spaces only, example below).
- # ----------------------------------------------------------------------
- # Global Variables
- # These variables set the defaults for all JOBs, however each JOB
- # may define its own, overriding the defaults.
- # update_every sets the default data collection frequency.
- # If unset, the python.d.plugin default is used.
- # update_every: 2
- # priority controls the order of charts at the netdata dashboard.
- # Lower numbers move the charts towards the top of the page.
- # If unset, the default for python.d.plugin is used.
- # priority: 60000
- # ----------------------------------------------------------------------
- # JOBS (data collection sources)
- # Pull data from local Netdata node.
- anomalies:
- name: 'Anomalies'
- # Host to pull data from.
- host: '127.0.0.1:19999'
- # Username and Password for Netdata if using basic auth.
- # username: '???'
- # password: '???'
- # Use http or https to pull data
- protocol: 'http'
- # SSL verify parameter for requests.get() calls
- tls_verify: true
- # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
- charts_regex: 'system\..*'
- # Charts to exclude, useful if you would like to exclude some specific charts.
- # Note: should be a ',' separated string like 'chart.name,chart.name'.
- charts_to_exclude: 'system.uptime,system.entropy'
- # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'.
- # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html.
- model: 'pca'
- # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs.
- train_max_n: 100000
- # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes).
- # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained.
- train_every_n: 1800
- # The length of the window of data to train on (14400 = last 4 hours).
- train_n_secs: 14400
- # How many prediction steps after a train event to just use previous prediction value for.
- # Used to reduce possibility of the training step itself appearing as an anomaly on the charts.
- train_no_prediction_n: 10
- # If you would like to train the model for the first time on a specific window then you can define it using the below two variables.
- # Start of training data for initial model.
- # initial_train_data_after: 1604578857
- # End of training data for initial model.
- # initial_train_data_before: 1604593257
- # If you would like to ignore recent data in training then you can offset it by offset_n_secs.
- offset_n_secs: 0
- # How many lagged values of each dimension to include in the 'feature vector' each model is trained on.
- lags_n: 5
- # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on.
- smooth_n: 3
- # How many differences to take in preprocessing your data.
- # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing
- # diffs_n=0 would mean training models on the raw values of each dimension.
- # diffs_n=1 means everything is done in terms of differences.
- diffs_n: 1
- # What is the typical proportion of anomalies in your data on average?
- # This parameter can control the sensitivity of your models to anomalies.
- # Some discussion here: https://github.com/yzhao062/pyod/issues/144
- contamination: 0.001
- # Set to true to include an "average_prob" dimension on anomalies probability chart which is
- # just the average of all anomaly probabilities at each time step
- include_average_prob: true
- # Define any custom models you would like to create anomaly probabilities for, some examples below to show how.
- # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers
- # and one on the cpu and mem apps metrics for the python.d.plugin.
- # custom_models:
- # - name: 'demos_cpu'
- # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
- # - name: 'apps_python_d_plugin'
- # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin'
- # Set to true to normalize, using min-max standardization, features used for the custom models.
- # Useful if your custom models contain dimensions on very different scales an model you use does
- # not internally do its own normalization. Usually best to leave as false.
- # custom_models_normalize: false
- # Standalone Custom models example as an additional collector job.
- # custom:
- # name: 'custom'
- # host: '127.0.0.1:19999'
- # protocol: 'http'
- # charts_regex: 'None'
- # charts_to_exclude: 'None'
- # model: 'pca'
- # train_max_n: 100000
- # train_every_n: 1800
- # train_n_secs: 14400
- # offset_n_secs: 0
- # lags_n: 5
- # smooth_n: 3
- # diffs_n: 1
- # contamination: 0.001
- # custom_models:
- # - name: 'user_netdata'
- # dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata'
- # - name: 'apps_python_d_plugin'
- # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin'
- # Pull data from some demo nodes for cross node custom models.
- # demos:
- # name: 'demos'
- # host: '127.0.0.1:19999'
- # protocol: 'http'
- # charts_regex: 'None'
- # charts_to_exclude: 'None'
- # model: 'pca'
- # train_max_n: 100000
- # train_every_n: 1800
- # train_n_secs: 14400
- # offset_n_secs: 0
- # lags_n: 5
- # smooth_n: 3
- # diffs_n: 1
- # contamination: 0.001
- # custom_models:
- # - name: 'system.cpu'
- # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
- # - name: 'system.ip'
- # dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent'
- # - name: 'system.net'
- # dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent'
- # - name: 'system.io'
- # dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out'
- # Example additional job if you want to also pull data from a child streaming to your
- # local parent or even a remote node so long as the Netdata REST API is accessible.
- # mychildnode1:
- # name: 'mychildnode1'
- # host: '127.0.0.1:19999/host/mychildnode1'
- # protocol: 'http'
- # charts_regex: 'system\..*'
- # charts_to_exclude: 'None'
- # model: 'pca'
- # train_max_n: 100000
- # train_every_n: 1800
- # train_n_secs: 14400
- # offset_n_secs: 0
- # lags_n: 5
- # smooth_n: 3
- # diffs_n: 1
- # contamination: 0.001
|