Dimension.cc 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "Config.h"
  3. #include "Dimension.h"
  4. #include "Query.h"
  5. using namespace ml;
  6. std::pair<CalculatedNumber *, size_t>
  7. TrainableDimension::getCalculatedNumbers() {
  8. size_t MinN = Cfg.MinTrainSamples;
  9. size_t MaxN = Cfg.MaxTrainSamples;
  10. // Figure out what our time window should be.
  11. time_t BeforeT = now_realtime_sec() - 1;
  12. time_t AfterT = BeforeT - (MaxN * updateEvery());
  13. BeforeT -= (BeforeT % updateEvery());
  14. AfterT -= (AfterT % updateEvery());
  15. BeforeT = std::min(BeforeT, latestTime());
  16. AfterT = std::max(AfterT, oldestTime());
  17. if (AfterT >= BeforeT)
  18. return { nullptr, 0 };
  19. CalculatedNumber *CNs = new CalculatedNumber[MaxN * (Cfg.LagN + 1)]();
  20. // Start the query.
  21. unsigned Idx = 0;
  22. unsigned CollectedValues = 0;
  23. unsigned TotalValues = 0;
  24. CalculatedNumber LastValue = std::numeric_limits<CalculatedNumber>::quiet_NaN();
  25. Query Q = Query(getRD());
  26. Q.init(AfterT, BeforeT);
  27. while (!Q.isFinished()) {
  28. if (Idx == MaxN)
  29. break;
  30. auto P = Q.nextMetric();
  31. CalculatedNumber Value = P.second;
  32. if (netdata_double_isnumber(Value)) {
  33. CNs[Idx] = Value;
  34. LastValue = CNs[Idx];
  35. CollectedValues++;
  36. } else
  37. CNs[Idx] = LastValue;
  38. Idx++;
  39. }
  40. TotalValues = Idx;
  41. if (CollectedValues < MinN) {
  42. delete[] CNs;
  43. return { nullptr, 0 };
  44. }
  45. // Find first non-NaN value.
  46. for (Idx = 0; std::isnan(CNs[Idx]); Idx++, TotalValues--) { }
  47. // Overwrite NaN values.
  48. if (Idx != 0)
  49. memmove(CNs, &CNs[Idx], sizeof(CalculatedNumber) * TotalValues);
  50. return { CNs, TotalValues };
  51. }
  52. MLResult TrainableDimension::trainModel() {
  53. auto P = getCalculatedNumbers();
  54. CalculatedNumber *CNs = P.first;
  55. unsigned N = P.second;
  56. if (!CNs)
  57. return MLResult::MissingData;
  58. unsigned TargetNumSamples = Cfg.MaxTrainSamples * Cfg.RandomSamplingRatio;
  59. double SamplingRatio = std::min(static_cast<double>(TargetNumSamples) / N, 1.0);
  60. SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
  61. SamplingRatio, Cfg.RandomNums);
  62. KM.train(SB, Cfg.MaxKMeansIters);
  63. Trained = true;
  64. ConstantModel = true;
  65. delete[] CNs;
  66. return MLResult::Success;
  67. }
  68. void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
  69. if (!Exists) {
  70. CNs.clear();
  71. return;
  72. }
  73. unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
  74. if (CNs.size() < N) {
  75. CNs.push_back(Value);
  76. return;
  77. }
  78. std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));
  79. if (CNs[N - 1] != Value)
  80. ConstantModel = false;
  81. CNs[N - 1] = Value;
  82. }
  83. std::pair<MLResult, bool> PredictableDimension::predict() {
  84. unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
  85. if (CNs.size() != N) {
  86. AnomalyBit = false;
  87. return { MLResult::MissingData, AnomalyBit };
  88. }
  89. CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
  90. std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));
  91. SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
  92. 1.0, Cfg.RandomNums);
  93. AnomalyScore = computeAnomalyScore(SB);
  94. delete[] TmpCNs;
  95. if (AnomalyScore == std::numeric_limits<CalculatedNumber>::quiet_NaN()) {
  96. AnomalyBit = false;
  97. return { MLResult::NaN, AnomalyBit };
  98. }
  99. AnomalyBit = AnomalyScore >= (100 * Cfg.DimensionAnomalyScoreThreshold);
  100. return { MLResult::Success, AnomalyBit };
  101. }