tdigest.h 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #pragma once
  2. #include <util/generic/string.h>
  3. #include <util/generic/vector.h>
  4. class TDigest {
  5. struct TCentroid {
  6. double Mean;
  7. double Count;
  8. TCentroid()
  9. : Mean(0)
  10. , Count(0)
  11. {
  12. }
  13. TCentroid(double x, double weight)
  14. : Mean(x)
  15. , Count(weight)
  16. {
  17. }
  18. bool operator<(const TCentroid& centroid) const {
  19. return Mean < centroid.Mean;
  20. }
  21. void Update(double x, double weight) {
  22. Count += weight;
  23. Mean += weight * (x - Mean) / Count;
  24. }
  25. };
  26. TVector<TCentroid> Centroids;
  27. TVector<TCentroid> Unmerged;
  28. TVector<TCentroid> Merged;
  29. typedef TVector<TCentroid>::iterator iter_t;
  30. double N;
  31. double Delta;
  32. double K;
  33. void Add(const TDigest& otherDigest);
  34. void AddCentroid(const TCentroid& centroid);
  35. double GetThreshold(double q);
  36. void MergeCentroid(TVector<TCentroid>& merged, double& sum, const TCentroid& centroid);
  37. protected:
  38. void Update(double x, double w = 1.0);
  39. public:
  40. TDigest(double delta = 0.01, double k = 25);
  41. TDigest(double delta, double k, double firstValue);
  42. TDigest(TStringBuf serializedDigest);
  43. TDigest(const TDigest* digest1, const TDigest* digest2); // merge
  44. TString Serialize();
  45. TDigest operator+(const TDigest& other);
  46. TDigest& operator+=(const TDigest& other);
  47. void AddValue(double value);
  48. void Compress();
  49. void Clear();
  50. double GetPercentile(double percentile);
  51. double GetRank(double value);
  52. i64 GetCount() const;
  53. };