anomalies.conf 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. # netdata python.d.plugin configuration for anomalies
  2. #
  3. # This file is in YaML format. Generally the format is:
  4. #
  5. # name: value
  6. #
  7. # There are 2 sections:
  8. # - global variables
  9. # - one or more JOBS
  10. #
  11. # JOBS allow you to collect values from multiple sources.
  12. # Each source will have its own set of charts.
  13. #
  14. # JOB parameters have to be indented (using spaces only, example below).
  15. # ----------------------------------------------------------------------
  16. # Global Variables
  17. # These variables set the defaults for all JOBs, however each JOB
  18. # may define its own, overriding the defaults.
  19. # update_every sets the default data collection frequency.
  20. # If unset, the python.d.plugin default is used.
  21. # update_every: 2
  22. # priority controls the order of charts at the netdata dashboard.
  23. # Lower numbers move the charts towards the top of the page.
  24. # If unset, the default for python.d.plugin is used.
  25. # priority: 60000
  26. # ----------------------------------------------------------------------
  27. # JOBS (data collection sources)
  28. # Pull data from local Netdata node.
  29. anomalies:
  30. name: 'Anomalies'
  31. # Host to pull data from.
  32. host: '127.0.0.1:19999'
  33. # Username and Password for Netdata if using basic auth.
  34. # username: '???'
  35. # password: '???'
  36. # Use http or https to pull data
  37. protocol: 'http'
  38. # SSL verify parameter for requests.get() calls
  39. tls_verify: true
  40. # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
  41. charts_regex: 'system\..*'
  42. # Charts to exclude, useful if you would like to exclude some specific charts.
  43. # Note: should be a ',' separated string like 'chart.name,chart.name'.
  44. charts_to_exclude: 'system.uptime,system.entropy'
  45. # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'.
  46. # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html.
  47. model: 'pca'
  48. # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs.
  49. train_max_n: 100000
  50. # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes).
  51. # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained.
  52. train_every_n: 1800
  53. # The length of the window of data to train on (14400 = last 4 hours).
  54. train_n_secs: 14400
  55. # How many prediction steps after a train event to just use previous prediction value for.
  56. # Used to reduce possibility of the training step itself appearing as an anomaly on the charts.
  57. train_no_prediction_n: 10
  58. # If you would like to train the model for the first time on a specific window then you can define it using the below two variables.
  59. # Start of training data for initial model.
  60. # initial_train_data_after: 1604578857
  61. # End of training data for initial model.
  62. # initial_train_data_before: 1604593257
  63. # If you would like to ignore recent data in training then you can offset it by offset_n_secs.
  64. offset_n_secs: 0
  65. # How many lagged values of each dimension to include in the 'feature vector' each model is trained on.
  66. lags_n: 5
  67. # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on.
  68. smooth_n: 3
  69. # How many differences to take in preprocessing your data.
  70. # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing
  71. # diffs_n=0 would mean training models on the raw values of each dimension.
  72. # diffs_n=1 means everything is done in terms of differences.
  73. diffs_n: 1
  74. # What is the typical proportion of anomalies in your data on average?
  75. # This parameter can control the sensitivity of your models to anomalies.
  76. # Some discussion here: https://github.com/yzhao062/pyod/issues/144
  77. contamination: 0.001
  78. # Set to true to include an "average_prob" dimension on anomalies probability chart which is
  79. # just the average of all anomaly probabilities at each time step
  80. include_average_prob: true
  81. # Define any custom models you would like to create anomaly probabilities for, some examples below to show how.
  82. # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers
  83. # and one on the cpu and mem apps metrics for the python.d.plugin.
  84. # custom_models:
  85. # - name: 'demos_cpu'
  86. # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
  87. # - name: 'apps_python_d_plugin'
  88. # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin'
  89. # Set to true to normalize, using min-max standardization, features used for the custom models.
  90. # Useful if your custom models contain dimensions on very different scales an model you use does
  91. # not internally do its own normalization. Usually best to leave as false.
  92. # custom_models_normalize: false
  93. # Standalone Custom models example as an additional collector job.
  94. # custom:
  95. # name: 'custom'
  96. # host: '127.0.0.1:19999'
  97. # protocol: 'http'
  98. # charts_regex: 'None'
  99. # charts_to_exclude: 'None'
  100. # model: 'pca'
  101. # train_max_n: 100000
  102. # train_every_n: 1800
  103. # train_n_secs: 14400
  104. # offset_n_secs: 0
  105. # lags_n: 5
  106. # smooth_n: 3
  107. # diffs_n: 1
  108. # contamination: 0.001
  109. # custom_models:
  110. # - name: 'user_netdata'
  111. # dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata'
  112. # - name: 'apps_python_d_plugin'
  113. # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin'
  114. # Pull data from some demo nodes for cross node custom models.
  115. # demos:
  116. # name: 'demos'
  117. # host: '127.0.0.1:19999'
  118. # protocol: 'http'
  119. # charts_regex: 'None'
  120. # charts_to_exclude: 'None'
  121. # model: 'pca'
  122. # train_max_n: 100000
  123. # train_every_n: 1800
  124. # train_n_secs: 14400
  125. # offset_n_secs: 0
  126. # lags_n: 5
  127. # smooth_n: 3
  128. # diffs_n: 1
  129. # contamination: 0.001
  130. # custom_models:
  131. # - name: 'system.cpu'
  132. # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
  133. # - name: 'system.ip'
  134. # dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent'
  135. # - name: 'system.net'
  136. # dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent'
  137. # - name: 'system.io'
  138. # dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out'
  139. # Example additional job if you want to also pull data from a child streaming to your
  140. # local parent or even a remote node so long as the Netdata REST API is accessible.
  141. # mychildnode1:
  142. # name: 'mychildnode1'
  143. # host: '127.0.0.1:19999/host/mychildnode1'
  144. # protocol: 'http'
  145. # charts_regex: 'system\..*'
  146. # charts_to_exclude: 'None'
  147. # model: 'pca'
  148. # train_max_n: 100000
  149. # train_every_n: 1800
  150. # train_n_secs: 14400
  151. # offset_n_secs: 0
  152. # lags_n: 5
  153. # smooth_n: 3
  154. # diffs_n: 1
  155. # contamination: 0.001