KMeans.cc 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "KMeans.h"
  3. #include <dlib/clustering.h>
  4. void KMeans::train(SamplesBuffer &SB, size_t MaxIterations) {
  5. std::vector<DSample> Samples = SB.preprocess();
  6. MinDist = std::numeric_limits<CalculatedNumber>::max();
  7. MaxDist = std::numeric_limits<CalculatedNumber>::min();
  8. {
  9. std::lock_guard<std::mutex> Lock(Mutex);
  10. ClusterCenters.clear();
  11. dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples);
  12. dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations);
  13. for (const auto &S : Samples) {
  14. CalculatedNumber MeanDist = 0.0;
  15. for (const auto &KMCenter : ClusterCenters)
  16. MeanDist += dlib::length(KMCenter - S);
  17. MeanDist /= NumClusters;
  18. if (MeanDist < MinDist)
  19. MinDist = MeanDist;
  20. if (MeanDist > MaxDist)
  21. MaxDist = MeanDist;
  22. }
  23. }
  24. }
  25. CalculatedNumber KMeans::anomalyScore(SamplesBuffer &SB) {
  26. std::vector<DSample> DSamples = SB.preprocess();
  27. std::unique_lock<std::mutex> Lock(Mutex, std::defer_lock);
  28. if (!Lock.try_lock())
  29. return std::numeric_limits<CalculatedNumber>::quiet_NaN();
  30. CalculatedNumber MeanDist = 0.0;
  31. for (const auto &CC: ClusterCenters)
  32. MeanDist += dlib::length(CC - DSamples.back());
  33. MeanDist /= NumClusters;
  34. if (MaxDist == MinDist)
  35. return 0.0;
  36. CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist));
  37. return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore;
  38. }