adaptive_histogram.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #pragma once
  2. #include "histogram.h"
  3. #include "common.h"
  4. #include <library/cpp/histogram/adaptive/protos/histo.pb.h>
  5. #include <util/generic/ptr.h>
  6. #include <util/generic/set.h>
  7. #include <util/generic/vector.h>
  8. namespace NKiwiAggr {
  9. class TAdaptiveHistogram: private TNonCopyable, public IHistogram {
  10. protected:
  11. static const size_t DEFAULT_INTERVALS = 100;
  12. private:
  13. using TPairSet = TSet<TWeightedValue>;
  14. struct TFastBin {
  15. // these names are for compatibility with TWeightedValue
  16. double first;
  17. double second;
  18. // both sums do not include current bin
  19. double SumBelow;
  20. double SumAbove;
  21. TFastBin(double first_, double second_, double sumBelow = 0, double sumAbove = 0)
  22. : first(first_)
  23. , second(second_)
  24. , SumBelow(sumBelow)
  25. , SumAbove(sumAbove)
  26. {
  27. }
  28. bool operator<(const TFastBin& rhs) const {
  29. return first < rhs.first;
  30. }
  31. };
  32. ui64 Id;
  33. double MinValue;
  34. double MaxValue;
  35. double Sum;
  36. size_t Intervals;
  37. TPairSet Bins;
  38. TPairSet BinsByQuality;
  39. TQualityFunction CalcQuality;
  40. TVector<TFastBin> PrecomputedBins;
  41. public:
  42. TAdaptiveHistogram(size_t intervals, ui64 id = 0, TQualityFunction qualityFunc = CalcWeightQuality);
  43. TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, TQualityFunction qualityFunc = nullptr);
  44. TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0, TQualityFunction qualityFunc = CalcWeightQuality);
  45. ~TAdaptiveHistogram() override {
  46. }
  47. TQualityFunction GetQualityFunc();
  48. void Clear() override;
  49. void Add(double value, double weight) override;
  50. void Add(const THistoRec& histoRec) override;
  51. void Merge(const THistogram& histo, double multiplier) final;
  52. void Merge(const TVector<THistogram>& histogramsToMerge) final;
  53. void Merge(TVector<IHistogramPtr> histogramsToMerge) final;
  54. void Multiply(double factor) final;
  55. void FromProto(const THistogram& histo) final;
  56. void ToProto(THistogram& histo) final;
  57. void SetId(ui64 id) final;
  58. ui64 GetId() final;
  59. bool Empty() final;
  60. double GetMinValue() final;
  61. double GetMaxValue() final;
  62. double GetSum() final;
  63. double GetSumInRange(double leftBound, double rightBound) final;
  64. double GetSumAboveBound(double bound) final;
  65. double GetSumBelowBound(double bound) final;
  66. double CalcUpperBound(double sum) final;
  67. double CalcLowerBound(double sum) final;
  68. double CalcUpperBoundSafe(double sum) final;
  69. double CalcLowerBoundSafe(double sum) final;
  70. void PrecomputePartialSums() final;
  71. private:
  72. void FromIHistogram(IHistogram* histo);
  73. void Add(const TWeightedValue& weightedValue, bool initial);
  74. void Erase(double value);
  75. void Shrink();
  76. template <typename TBins, typename TGetSumAbove>
  77. double GetSumAboveBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumAbove& getSumAbove) const;
  78. template <typename TBins, typename TGetSumBelow>
  79. double GetSumBelowBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumBelow& getSumBelow) const;
  80. };
  81. template <TQualityFunction QualityFunction>
  82. class TDefinedAdaptiveHistogram: public TAdaptiveHistogram {
  83. public:
  84. TDefinedAdaptiveHistogram(size_t intervals, ui64 id = 0)
  85. : TAdaptiveHistogram(intervals, id, QualityFunction)
  86. {
  87. }
  88. TDefinedAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0)
  89. : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
  90. {
  91. }
  92. TDefinedAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals = DEFAULT_INTERVALS, ui64 defaultId = 0)
  93. : TAdaptiveHistogram(histo, defaultIntervals, defaultId, QualityFunction)
  94. {
  95. }
  96. ~TDefinedAdaptiveHistogram() override {
  97. }
  98. };
  99. typedef TDefinedAdaptiveHistogram<CalcDistanceQuality> TAdaptiveDistanceHistogram;
  100. typedef TDefinedAdaptiveHistogram<CalcWeightQuality> TAdaptiveWeightHistogram;
  101. typedef TDefinedAdaptiveHistogram<CalcWardQuality> TAdaptiveWardHistogram;
  102. }