Browse Source

Skip training of constant metrics. (#12212)

Detect dimensions whose values do not change, and skip them from
training. This allows us to reduce the number of training operations
by ~40-50%.

Notice that we don't skip the very 1st training iteration, because a
dimension's value might change at any point in time, and we need to
have a trained model in order to compute its anomaly score.
vkalintiris 3 years ago
parent
commit
207a743c77
2 changed files with 11 additions and 0 deletions
  1. 5 0
      ml/Dimension.cc
  2. 6 0
      ml/Dimension.h

+ 5 - 0
ml/Dimension.cc

@@ -128,6 +128,7 @@ MLResult TrainableDimension::trainModel() {
     SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN);
     KM.train(SB, Cfg.MaxKMeansIters);
     Trained = true;
+    ConstantModel = true;
 
     delete[] CNs;
     return MLResult::Success;
@@ -146,6 +147,10 @@ void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
     }
 
     std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));
+
+    if (CNs[N - 1] != Value)
+        ConstantModel = false;
+
     CNs[N - 1] = Value;
 }
 

+ 6 - 0
ml/Dimension.h

@@ -55,6 +55,9 @@ public:
     }
 
     bool shouldTrain(const TimePoint &TP) const {
+        if (ConstantModel)
+            return false;
+
         return (LastTrainedAt + TrainEvery) < TP;
     }
 
@@ -70,6 +73,9 @@ private:
 public:
     TimePoint LastTrainedAt{Seconds{0}};
 
+protected:
+    std::atomic<bool> ConstantModel{false};
+
 private:
     Seconds TrainEvery;
     KMeans KM;