Dimension.cc 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "Config.h"
  3. #include "Dimension.h"
  4. #include "Query.h"
  5. using namespace ml;
  6. /*
  7. * Copy of the unpack_storage_number which allows us to convert
  8. * a storage_number to double.
  9. */
  10. static CalculatedNumber unpack_storage_number_dbl(storage_number value) {
  11. if(!value)
  12. return 0;
  13. int sign = 0, exp = 0;
  14. int factor = 10;
  15. // bit 32 = 0:positive, 1:negative
  16. if(unlikely(value & (1 << 31)))
  17. sign = 1;
  18. // bit 31 = 0:divide, 1:multiply
  19. if(unlikely(value & (1 << 30)))
  20. exp = 1;
  21. // bit 27 SN_EXISTS_100
  22. if(unlikely(value & (1 << 26)))
  23. factor = 100;
  24. // bit 26 SN_EXISTS_RESET
  25. // bit 25 SN_ANOMALY_BIT
  26. // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total)
  27. int mul = (value & ((1<<29)|(1<<28)|(1<<27))) >> 27;
  28. // bit 24 to bit 1 = the value, so remove all other bits
  29. value ^= value & ((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24));
  30. CalculatedNumber CN = value;
  31. if(exp) {
  32. for(; mul; mul--)
  33. CN *= factor;
  34. }
  35. else {
  36. for( ; mul ; mul--)
  37. CN /= 10;
  38. }
  39. if(sign)
  40. CN = -CN;
  41. return CN;
  42. }
  43. std::pair<CalculatedNumber *, size_t>
  44. TrainableDimension::getCalculatedNumbers() {
  45. size_t MinN = Cfg.MinTrainSamples;
  46. size_t MaxN = Cfg.MaxTrainSamples;
  47. // Figure out what our time window should be.
  48. time_t BeforeT = now_realtime_sec() - 1;
  49. time_t AfterT = BeforeT - (MaxN * updateEvery());
  50. BeforeT -= (BeforeT % updateEvery());
  51. AfterT -= (AfterT % updateEvery());
  52. BeforeT = std::min(BeforeT, latestTime());
  53. AfterT = std::max(AfterT, oldestTime());
  54. if (AfterT >= BeforeT)
  55. return { nullptr, 0 };
  56. CalculatedNumber *CNs = new CalculatedNumber[MaxN * (Cfg.LagN + 1)]();
  57. // Start the query.
  58. unsigned Idx = 0;
  59. unsigned CollectedValues = 0;
  60. unsigned TotalValues = 0;
  61. CalculatedNumber LastValue = std::numeric_limits<CalculatedNumber>::quiet_NaN();
  62. Query Q = Query(getRD());
  63. Q.init(AfterT, BeforeT);
  64. while (!Q.isFinished()) {
  65. if (Idx == MaxN)
  66. break;
  67. auto P = Q.nextMetric();
  68. storage_number SN = P.second;
  69. if (does_storage_number_exist(SN)) {
  70. CNs[Idx] = unpack_storage_number_dbl(SN);
  71. LastValue = CNs[Idx];
  72. CollectedValues++;
  73. } else
  74. CNs[Idx] = LastValue;
  75. Idx++;
  76. }
  77. TotalValues = Idx;
  78. if (CollectedValues < MinN) {
  79. delete[] CNs;
  80. return { nullptr, 0 };
  81. }
  82. // Find first non-NaN value.
  83. for (Idx = 0; std::isnan(CNs[Idx]); Idx++, TotalValues--) { }
  84. // Overwrite NaN values.
  85. if (Idx != 0)
  86. memmove(CNs, &CNs[Idx], sizeof(CalculatedNumber) * TotalValues);
  87. return { CNs, TotalValues };
  88. }
  89. MLResult TrainableDimension::trainModel() {
  90. auto P = getCalculatedNumbers();
  91. CalculatedNumber *CNs = P.first;
  92. unsigned N = P.second;
  93. if (!CNs)
  94. return MLResult::MissingData;
  95. unsigned TargetNumSamples = Cfg.MaxTrainSamples * Cfg.RandomSamplingRatio;
  96. double SamplingRatio = std::min(static_cast<double>(TargetNumSamples) / N, 1.0);
  97. SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
  98. SamplingRatio, Cfg.RandomNums);
  99. KM.train(SB, Cfg.MaxKMeansIters);
  100. Trained = true;
  101. ConstantModel = true;
  102. delete[] CNs;
  103. return MLResult::Success;
  104. }
  105. void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
  106. if (!Exists) {
  107. CNs.clear();
  108. return;
  109. }
  110. unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
  111. if (CNs.size() < N) {
  112. CNs.push_back(Value);
  113. return;
  114. }
  115. std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));
  116. if (CNs[N - 1] != Value)
  117. ConstantModel = false;
  118. CNs[N - 1] = Value;
  119. }
  120. std::pair<MLResult, bool> PredictableDimension::predict() {
  121. unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
  122. if (CNs.size() != N) {
  123. AnomalyBit = false;
  124. return { MLResult::MissingData, AnomalyBit };
  125. }
  126. CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
  127. std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));
  128. SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
  129. 1.0, Cfg.RandomNums);
  130. AnomalyScore = computeAnomalyScore(SB);
  131. delete[] TmpCNs;
  132. if (AnomalyScore == std::numeric_limits<CalculatedNumber>::quiet_NaN()) {
  133. AnomalyBit = false;
  134. return { MLResult::NaN, AnomalyBit };
  135. }
  136. AnomalyBit = AnomalyScore >= (100 * Cfg.DimensionAnomalyScoreThreshold);
  137. return { MLResult::Success, AnomalyBit };
  138. }