Config.cc 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "Config.h"
  3. #include "ml-private.h"
  4. using namespace ml;
  5. /*
  6. * Global configuration instance to be shared between training and
  7. * prediction threads.
  8. */
  9. Config ml::Cfg;
  10. template <typename T>
  11. static T clamp(const T& Value, const T& Min, const T& Max) {
  12. return std::max(Min, std::min(Value, Max));
  13. }
  14. /*
  15. * Initialize global configuration variable.
  16. */
  17. void Config::readMLConfig(void) {
  18. const char *ConfigSectionML = CONFIG_SECTION_ML;
  19. bool EnableAnomalyDetection = config_get_boolean(ConfigSectionML, "enabled", false);
  20. /*
  21. * Read values
  22. */
  23. unsigned MaxTrainSamples = config_get_number(ConfigSectionML, "maximum num samples to train", 4 * 3600);
  24. unsigned MinTrainSamples = config_get_number(ConfigSectionML, "minimum num samples to train", 1 * 3600);
  25. unsigned TrainEvery = config_get_number(ConfigSectionML, "train every", 1 * 3600);
  26. unsigned DBEngineAnomalyRateEvery = config_get_number(ConfigSectionML, "dbengine anomaly rate every", 60);
  27. unsigned DiffN = config_get_number(ConfigSectionML, "num samples to diff", 1);
  28. unsigned SmoothN = config_get_number(ConfigSectionML, "num samples to smooth", 3);
  29. unsigned LagN = config_get_number(ConfigSectionML, "num samples to lag", 5);
  30. double RandomSamplingRatio = config_get_float(ConfigSectionML, "random sampling ratio", 1.0 / LagN);
  31. unsigned MaxKMeansIters = config_get_number(ConfigSectionML, "maximum number of k-means iterations", 1000);
  32. double DimensionAnomalyScoreThreshold = config_get_float(ConfigSectionML, "dimension anomaly score threshold", 0.99);
  33. double HostAnomalyRateThreshold = config_get_float(ConfigSectionML, "host anomaly rate threshold", 0.01);
  34. double ADMinWindowSize = config_get_float(ConfigSectionML, "minimum window size", 30);
  35. double ADMaxWindowSize = config_get_float(ConfigSectionML, "maximum window size", 600);
  36. double ADIdleWindowSize = config_get_float(ConfigSectionML, "idle window size", 30);
  37. double ADWindowRateThreshold = config_get_float(ConfigSectionML, "window minimum anomaly rate", 0.25);
  38. double ADDimensionRateThreshold = config_get_float(ConfigSectionML, "anomaly event min dimension rate threshold", 0.05);
  39. std::stringstream SS;
  40. SS << netdata_configured_cache_dir << "/anomaly-detection.db";
  41. Cfg.AnomalyDBPath = SS.str();
  42. /*
  43. * Clamp
  44. */
  45. MaxTrainSamples = clamp(MaxTrainSamples, 1 * 3600u, 6 * 3600u);
  46. MinTrainSamples = clamp(MinTrainSamples, 1 * 3600u, 6 * 3600u);
  47. TrainEvery = clamp(TrainEvery, 1 * 3600u, 6 * 3600u);
  48. DBEngineAnomalyRateEvery = clamp(DBEngineAnomalyRateEvery, 1 * 30u, 15 * 60u);
  49. DiffN = clamp(DiffN, 0u, 1u);
  50. SmoothN = clamp(SmoothN, 0u, 5u);
  51. LagN = clamp(LagN, 1u, 5u);
  52. RandomSamplingRatio = clamp(RandomSamplingRatio, 0.2, 1.0);
  53. MaxKMeansIters = clamp(MaxKMeansIters, 500u, 1000u);
  54. DimensionAnomalyScoreThreshold = clamp(DimensionAnomalyScoreThreshold, 0.01, 5.00);
  55. HostAnomalyRateThreshold = clamp(HostAnomalyRateThreshold, 0.01, 1.0);
  56. ADMinWindowSize = clamp(ADMinWindowSize, 30.0, 300.0);
  57. ADMaxWindowSize = clamp(ADMaxWindowSize, 60.0, 900.0);
  58. ADIdleWindowSize = clamp(ADIdleWindowSize, 30.0, 900.0);
  59. ADWindowRateThreshold = clamp(ADWindowRateThreshold, 0.01, 0.99);
  60. ADDimensionRateThreshold = clamp(ADDimensionRateThreshold, 0.01, 0.99);
  61. /*
  62. * Validate
  63. */
  64. if (MinTrainSamples >= MaxTrainSamples) {
  65. error("invalid min/max train samples found (%u >= %u)", MinTrainSamples, MaxTrainSamples);
  66. MinTrainSamples = 1 * 3600;
  67. MaxTrainSamples = 4 * 3600;
  68. }
  69. if (ADMinWindowSize >= ADMaxWindowSize) {
  70. error("invalid min/max anomaly window size found (%lf >= %lf)", ADMinWindowSize, ADMaxWindowSize);
  71. ADMinWindowSize = 30.0;
  72. ADMaxWindowSize = 600.0;
  73. }
  74. /*
  75. * Assign to config instance
  76. */
  77. Cfg.EnableAnomalyDetection = EnableAnomalyDetection;
  78. Cfg.MaxTrainSamples = MaxTrainSamples;
  79. Cfg.MinTrainSamples = MinTrainSamples;
  80. Cfg.TrainEvery = TrainEvery;
  81. Cfg.DBEngineAnomalyRateEvery = DBEngineAnomalyRateEvery;
  82. Cfg.DiffN = DiffN;
  83. Cfg.SmoothN = SmoothN;
  84. Cfg.LagN = LagN;
  85. Cfg.RandomSamplingRatio = RandomSamplingRatio;
  86. Cfg.MaxKMeansIters = MaxKMeansIters;
  87. Cfg.DimensionAnomalyScoreThreshold = DimensionAnomalyScoreThreshold;
  88. Cfg.HostAnomalyRateThreshold = HostAnomalyRateThreshold;
  89. Cfg.ADMinWindowSize = ADMinWindowSize;
  90. Cfg.ADMaxWindowSize = ADMaxWindowSize;
  91. Cfg.ADIdleWindowSize = ADIdleWindowSize;
  92. Cfg.ADWindowRateThreshold = ADWindowRateThreshold;
  93. Cfg.ADDimensionRateThreshold = ADDimensionRateThreshold;
  94. Cfg.HostsToSkip = config_get(ConfigSectionML, "hosts to skip from training", "!*");
  95. Cfg.SP_HostsToSkip = simple_pattern_create(Cfg.HostsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
  96. // Always exclude anomaly_detection charts from training.
  97. Cfg.ChartsToSkip = "anomaly_detection.* ";
  98. Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*");
  99. Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
  100. Cfg.StreamADCharts = config_get_boolean(ConfigSectionML, "stream anomaly detection charts", true);
  101. }