log_histogram_collector.h 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. #pragma once
  2. #include "log_histogram_snapshot.h"
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/utility.h>
  5. #include <util/generic/yexception.h>
  6. #include <mutex>
  7. #include <cmath>
  8. namespace NMonitoring {
  9. class TLogHistogramCollector {
  10. public:
  11. static constexpr int DEFAULT_START_POWER = -1;
  12. explicit TLogHistogramCollector(int startPower = DEFAULT_START_POWER)
  13. : StartPower_(startPower)
  14. , CountZero_(0u)
  15. {}
  16. void Collect(TLogHistogramSnapshot* logHist) {
  17. std::lock_guard guard(Mutex_);
  18. Merge(logHist);
  19. }
  20. bool Collect(double value) {
  21. std::lock_guard guard(Mutex_);
  22. return CollectDouble(value);
  23. }
  24. TLogHistogramSnapshotPtr Snapshot() const {
  25. std::lock_guard guard(Mutex_);
  26. return MakeIntrusive<TLogHistogramSnapshot>(BASE, CountZero_, StartPower_, Buckets_);
  27. }
  28. void AddZeros(ui64 zerosCount) noexcept {
  29. std::lock_guard guard(Mutex_);
  30. CountZero_ += zerosCount;
  31. }
  32. private:
  33. int StartPower_;
  34. ui64 CountZero_;
  35. TVector<double> Buckets_;
  36. mutable std::mutex Mutex_;
  37. static constexpr size_t MAX_BUCKETS = LOG_HIST_MAX_BUCKETS;
  38. static constexpr double BASE = 1.5;
  39. private:
  40. int EstimateBucketIndex(double value) const {
  41. return (int) (std::floor(std::log(value) / std::log(BASE)) - StartPower_);
  42. }
  43. void CollectPositiveDouble(double value) {
  44. ssize_t idx = std::floor(std::log(value) / std::log(BASE)) - StartPower_;
  45. if (idx >= Buckets_.ysize()) {
  46. idx = ExtendUp(idx);
  47. } else if (idx <= 0) {
  48. idx = Max<ssize_t>(0, ExtendDown(idx, 1));
  49. }
  50. ++Buckets_[idx];
  51. }
  52. bool CollectDouble(double value) {
  53. if (Y_UNLIKELY(std::isnan(value) || std::isinf(value))) {
  54. return false;
  55. }
  56. if (value <= 0.0) {
  57. ++CountZero_;
  58. } else {
  59. CollectPositiveDouble(value);
  60. }
  61. return true;
  62. }
  63. void Merge(TLogHistogramSnapshot* logHist) {
  64. CountZero_ += logHist->ZerosCount();
  65. const i32 firstIdxBeforeExtend = logHist->StartPower() - StartPower_;
  66. const i32 lastIdxBeforeExtend = firstIdxBeforeExtend + logHist->Count() - 1;
  67. if (firstIdxBeforeExtend > Max<i16>() || firstIdxBeforeExtend < Min<i16>()) {
  68. ythrow yexception() << "i16 overflow on first index";
  69. }
  70. if (lastIdxBeforeExtend > Max<i16>() || lastIdxBeforeExtend < Min<i16>()) {
  71. ythrow yexception() << "i16 overflow on last index";
  72. }
  73. i64 firstIdx = ExtendBounds(firstIdxBeforeExtend, lastIdxBeforeExtend, 0).first;
  74. size_t toMerge = std::min<ui32>(std::max<i64>(-firstIdx, (i64) 0), logHist->Count());
  75. if (toMerge) {
  76. for (size_t i = 0; i < toMerge; ++i) {
  77. Buckets_[0] += logHist->Bucket(i);
  78. }
  79. firstIdx = 0;
  80. }
  81. for (size_t i = toMerge; i != logHist->Count(); ++i) {
  82. Buckets_[firstIdx] += logHist->Bucket(i);
  83. ++firstIdx;
  84. }
  85. }
  86. int ExtendUp(int expectedIndex) {
  87. Y_DEBUG_ABORT_UNLESS(expectedIndex >= (int) Buckets_.size());
  88. const size_t toAdd = expectedIndex - Buckets_.size() + 1;
  89. const size_t newSize = Buckets_.size() + toAdd;
  90. if (newSize <= MAX_BUCKETS) {
  91. Buckets_.resize(newSize, 0.0);
  92. return expectedIndex;
  93. }
  94. const size_t toRemove = newSize - MAX_BUCKETS;
  95. const size_t actualToRemove = std::min<size_t>(toRemove, Buckets_.size());
  96. if (actualToRemove > 0) {
  97. const double firstWeight = std::accumulate(Buckets_.cbegin(), Buckets_.cbegin() + actualToRemove, 0.0);
  98. Buckets_.erase(Buckets_.cbegin(), Buckets_.cbegin() + actualToRemove);
  99. if (Buckets_.empty()) {
  100. Buckets_.push_back(firstWeight);
  101. } else {
  102. Buckets_[0] = firstWeight;
  103. }
  104. }
  105. Buckets_.resize(MAX_BUCKETS, 0.0);
  106. StartPower_ += toRemove;
  107. return expectedIndex - toRemove;
  108. }
  109. int ExtendDown(int expectedIndex, int margin) {
  110. Y_DEBUG_ABORT_UNLESS(expectedIndex <= 0);
  111. int toAdd = std::min<int>(MAX_BUCKETS - Buckets_.size(), margin - expectedIndex);
  112. if (toAdd > 0) {
  113. Buckets_.insert(Buckets_.begin(), toAdd, 0.0);
  114. StartPower_ -= toAdd;
  115. }
  116. return expectedIndex + toAdd;
  117. }
  118. std::pair<ssize_t, ssize_t> ExtendBounds(ssize_t startIdx, ssize_t endIdx, ui8 margin) {
  119. ssize_t realEndIdx;
  120. ssize_t realStartIdx;
  121. if (endIdx >= Buckets_.ysize()) {
  122. Buckets_.reserve(std::max<size_t>(std::min<ui32>(endIdx - startIdx + 1ul, MAX_BUCKETS), 0ul));
  123. realEndIdx = ExtendUp(endIdx);
  124. startIdx += realEndIdx - endIdx;
  125. } else {
  126. realEndIdx = endIdx;
  127. }
  128. if (startIdx < 1) {
  129. realStartIdx = ExtendDown(startIdx, margin);
  130. realEndIdx += realStartIdx - startIdx;
  131. } else {
  132. realStartIdx = startIdx;
  133. }
  134. return std::make_pair(realStartIdx, realEndIdx);
  135. }
  136. };
  137. } // namespace NMonitoring