adaptive_histogram.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. #include "adaptive_histogram.h"
  2. #include <util/generic/algorithm.h>
  3. #include <util/generic/yexception.h>
  4. #include <util/generic/ymath.h>
  5. #include <util/string/printf.h>
  6. #include <util/system/backtrace.h>
  7. namespace NKiwiAggr {
  8. TAdaptiveHistogram::TAdaptiveHistogram(size_t intervals, ui64 id, TQualityFunction qualityFunc)
  9. : Id(id)
  10. , Sum(0.0)
  11. , Intervals(intervals)
  12. , CalcQuality(qualityFunc)
  13. {
  14. }
  15. TAdaptiveHistogram::TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  16. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  17. {
  18. FromProto(histo);
  19. }
  20. TAdaptiveHistogram::TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  21. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  22. {
  23. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo);
  24. if (!adaptiveHisto) {
  25. FromIHistogram(histo);
  26. return;
  27. }
  28. Id = adaptiveHisto->Id;
  29. MinValue = adaptiveHisto->MinValue;
  30. MaxValue = adaptiveHisto->MaxValue;
  31. Sum = adaptiveHisto->Sum;
  32. Intervals = adaptiveHisto->Intervals;
  33. Bins = adaptiveHisto->Bins;
  34. BinsByQuality = adaptiveHisto->BinsByQuality;
  35. if (CalcQuality == nullptr) {
  36. CalcQuality = adaptiveHisto->CalcQuality;
  37. }
  38. }
  39. TQualityFunction TAdaptiveHistogram::GetQualityFunc() {
  40. return CalcQuality;
  41. }
  42. void TAdaptiveHistogram::Clear() {
  43. Sum = 0.0;
  44. Bins.clear();
  45. BinsByQuality.clear();
  46. }
  47. void TAdaptiveHistogram::Add(const THistoRec& histoRec) {
  48. if (!histoRec.HasId() || histoRec.GetId() == Id) {
  49. Add(histoRec.GetValue(), histoRec.GetWeight());
  50. }
  51. }
  52. void TAdaptiveHistogram::Add(double value, double weight) {
  53. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  54. ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
  55. }
  56. TWeightedValue weightedValue(value, weight);
  57. Add(weightedValue, true);
  58. PrecomputedBins.clear();
  59. }
  60. void TAdaptiveHistogram::Merge(const THistogram& histo, double multiplier) {
  61. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  62. fprintf(stderr, "Merging in histogram id %lu: skip bad histo with minvalue %f maxvalue %f\n", Id, histo.GetMinValue(), histo.GetMaxValue());
  63. return;
  64. }
  65. if (histo.FreqSize() == 0) {
  66. return; // skip empty histos
  67. }
  68. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM ||
  69. histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM ||
  70. histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM ||
  71. histo.GetType() == HT_ADAPTIVE_HISTOGRAM)
  72. {
  73. Y_VERIFY(histo.FreqSize() == histo.PositionSize(), "Corrupted histo");
  74. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  75. double value = histo.GetPosition(j);
  76. double weight = histo.GetFreq(j);
  77. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  78. fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
  79. continue;
  80. }
  81. Add(value, weight * multiplier);
  82. }
  83. MinValue = Min(MinValue, histo.GetMinValue());
  84. MaxValue = Max(MaxValue, histo.GetMaxValue());
  85. } else if (histo.GetType() == HT_FIXED_BIN_HISTOGRAM) {
  86. double pos = histo.GetMinValue() + histo.GetBinRange() / 2.0;
  87. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  88. double weight = histo.GetFreq(j);
  89. if (!IsValidFloat(pos) || !IsValidFloat(weight)) {
  90. fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, pos, weight);
  91. pos += histo.GetBinRange();
  92. continue;
  93. }
  94. Add(pos, weight * multiplier);
  95. pos += histo.GetBinRange();
  96. }
  97. MinValue = Min(MinValue, histo.GetMinValue());
  98. MaxValue = Max(MaxValue, histo.GetMaxValue());
  99. } else {
  100. ythrow yexception() << "Unknown THistogram type";
  101. }
  102. }
  103. void TAdaptiveHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
  104. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  105. Merge(histogramsToMerge[i], 1.0);
  106. }
  107. }
  108. void TAdaptiveHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
  109. TVector<IHistogramPtr> histogramsToMergeRepacked(0);
  110. TVector<TAdaptiveHistogram*> histograms(0);
  111. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  112. if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
  113. continue;
  114. }
  115. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get());
  116. if (adaptiveHisto) {
  117. histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
  118. } else {
  119. histogramsToMergeRepacked.push_back(IHistogramPtr(new TAdaptiveHistogram(histogramsToMerge[i].Get(), Intervals, Id, CalcQuality))); // Convert histograms that are not of TFixedBinHistogram type
  120. }
  121. if (histogramsToMergeRepacked.back()->Empty()) {
  122. continue;
  123. }
  124. histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get()));
  125. }
  126. if (histograms.size() == 0) {
  127. return;
  128. }
  129. for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
  130. TAdaptiveHistogram* histo = histograms[histoIndex];
  131. for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) {
  132. Add(*it, true);
  133. }
  134. }
  135. for (size_t i = 0; i < histograms.size(); ++i) {
  136. MinValue = Min(MinValue, histograms[i]->MinValue);
  137. MaxValue = Max(MaxValue, histograms[i]->MaxValue);
  138. }
  139. }
  140. void TAdaptiveHistogram::Multiply(double factor) {
  141. if (!IsValidFloat(factor) || factor <= 0) {
  142. ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
  143. }
  144. Sum *= factor;
  145. TPairSet newBins;
  146. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  147. newBins.insert(TWeightedValue(it->first, it->second * factor));
  148. }
  149. Bins = newBins;
  150. TPairSet newBinsByQuality;
  151. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  152. TPairSet::iterator rightBin = it;
  153. ++rightBin;
  154. if (rightBin == Bins.end()) {
  155. break;
  156. }
  157. newBinsByQuality.insert(CalcQuality(*it, *rightBin));
  158. }
  159. BinsByQuality = newBinsByQuality;
  160. }
  161. void TAdaptiveHistogram::FromProto(const THistogram& histo) {
  162. Y_VERIFY(histo.HasType(), "Attempt to parse TAdaptiveHistogram from THistogram protobuf with no Type field set");
  163. ;
  164. switch (histo.GetType()) { // check that histogram type could be deduced
  165. case HT_ADAPTIVE_DISTANCE_HISTOGRAM:
  166. case HT_ADAPTIVE_WEIGHT_HISTOGRAM:
  167. case HT_ADAPTIVE_WARD_HISTOGRAM:
  168. break; // ok
  169. case HT_ADAPTIVE_HISTOGRAM:
  170. if (CalcQuality != nullptr)
  171. break; // ok
  172. [[fallthrough]];
  173. default: // not ok
  174. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType();
  175. }
  176. if (histo.FreqSize() != histo.PositionSize()) {
  177. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize();
  178. }
  179. if (CalcQuality == nullptr) {
  180. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
  181. CalcQuality = CalcDistanceQuality;
  182. } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
  183. CalcQuality = CalcWeightQuality;
  184. } else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) {
  185. CalcQuality = CalcWardQuality;
  186. } else {
  187. ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function";
  188. }
  189. }
  190. Id = histo.GetId();
  191. Sum = 0.0;
  192. Intervals = Max(Intervals, histo.FreqSize());
  193. for (size_t i = 0; i < histo.FreqSize(); ++i) {
  194. double value = histo.GetPosition(i);
  195. double weight = histo.GetFreq(i);
  196. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  197. fprintf(stderr, "FromProto in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
  198. continue;
  199. }
  200. Add(value, weight);
  201. }
  202. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  203. ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());
  204. }
  205. MinValue = histo.GetMinValue();
  206. MaxValue = histo.GetMaxValue();
  207. }
  208. void TAdaptiveHistogram::ToProto(THistogram& histo) {
  209. histo.Clear();
  210. if (CalcQuality == CalcDistanceQuality) {
  211. histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM);
  212. } else if (CalcQuality == CalcWeightQuality) {
  213. histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM);
  214. } else if (CalcQuality == CalcWardQuality) {
  215. histo.SetType(HT_ADAPTIVE_WARD_HISTOGRAM);
  216. } else {
  217. histo.SetType(HT_ADAPTIVE_HISTOGRAM);
  218. }
  219. histo.SetId(Id);
  220. if (Empty()) {
  221. return;
  222. }
  223. histo.SetMinValue(MinValue);
  224. histo.SetMaxValue(MaxValue);
  225. for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) {
  226. histo.AddFreq(it->second);
  227. histo.AddPosition(it->first);
  228. }
  229. }
  230. void TAdaptiveHistogram::SetId(ui64 id) {
  231. Id = id;
  232. }
  233. ui64 TAdaptiveHistogram::GetId() {
  234. return Id;
  235. }
  236. bool TAdaptiveHistogram::Empty() {
  237. return Bins.size() == 0;
  238. }
  239. double TAdaptiveHistogram::GetMinValue() {
  240. return MinValue;
  241. }
  242. double TAdaptiveHistogram::GetMaxValue() {
  243. return MaxValue;
  244. }
  245. double TAdaptiveHistogram::GetSum() {
  246. return Sum;
  247. }
  248. double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) {
  249. if (leftBound > rightBound) {
  250. return 0.0;
  251. }
  252. return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
  253. }
  254. double TAdaptiveHistogram::GetSumAboveBound(double bound) {
  255. if (Empty()) {
  256. return 0.0;
  257. }
  258. if (bound < MinValue) {
  259. return Sum;
  260. }
  261. if (bound > MaxValue) {
  262. return 0.0;
  263. }
  264. if (!PrecomputedBins.empty()) {
  265. return GetSumAboveBoundImpl(
  266. bound,
  267. PrecomputedBins,
  268. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  269. [](const auto& it) { return it->SumAbove; });
  270. } else {
  271. return GetSumAboveBoundImpl(
  272. bound,
  273. Bins,
  274. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  275. [this](TPairSet::const_iterator rightBin) {
  276. ++rightBin;
  277. double sum = 0;
  278. for (TPairSet::const_iterator it = rightBin; it != Bins.end(); ++it) {
  279. sum += it->second;
  280. }
  281. return sum;
  282. });
  283. }
  284. }
  285. double TAdaptiveHistogram::GetSumBelowBound(double bound) {
  286. if (Empty()) {
  287. return 0.0;
  288. }
  289. if (bound < MinValue) {
  290. return 0.0;
  291. }
  292. if (bound > MaxValue) {
  293. return Sum;
  294. }
  295. if (!PrecomputedBins.empty()) {
  296. return GetSumBelowBoundImpl(
  297. bound,
  298. PrecomputedBins,
  299. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  300. [](const auto& it) { return it->SumBelow; });
  301. } else {
  302. return GetSumBelowBoundImpl(
  303. bound,
  304. Bins,
  305. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  306. [this](TPairSet::const_iterator rightBin) {
  307. double sum = 0;
  308. for (TPairSet::iterator it = Bins.begin(); it != rightBin; ++it) {
  309. sum += it->second;
  310. }
  311. return sum;
  312. });
  313. }
  314. }
  315. double TAdaptiveHistogram::CalcUpperBound(double sum) {
  316. Y_VERIFY(sum >= 0, "Sum must be >= 0");
  317. if (sum == 0.0) {
  318. return MinValue;
  319. }
  320. if (Empty()) {
  321. return MaxValue;
  322. }
  323. TPairSet::iterator current = Bins.begin();
  324. double gatheredSum = 0.0;
  325. while (current != Bins.end() && gatheredSum < sum) {
  326. gatheredSum += current->second;
  327. ++current;
  328. }
  329. --current;
  330. if (gatheredSum < sum) {
  331. return MaxValue;
  332. }
  333. TWeightedValue left(MinValue, 0.0);
  334. TWeightedValue right(MaxValue, 0.0);
  335. if (current != Bins.begin()) {
  336. TPairSet::iterator leftBin = current;
  337. --leftBin;
  338. left = *leftBin;
  339. }
  340. {
  341. TPairSet::iterator rightBin = current;
  342. ++rightBin;
  343. if (rightBin != Bins.end()) {
  344. right = *rightBin;
  345. }
  346. }
  347. double sumToAdd = sum - (gatheredSum - current->second - left.second / 2);
  348. if (sumToAdd <= ((current->second + left.second) / 2)) {
  349. return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second);
  350. } else {
  351. sumToAdd -= (current->second + left.second) / 2;
  352. return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second);
  353. }
  354. }
  355. double TAdaptiveHistogram::CalcLowerBound(double sum) {
  356. Y_VERIFY(sum >= 0, "Sum must be >= 0");
  357. if (sum == 0.0) {
  358. return MaxValue;
  359. }
  360. if (Empty()) {
  361. return MinValue;
  362. }
  363. TPairSet::iterator current = Bins.end();
  364. double gatheredSum = 0.0;
  365. while (current != Bins.begin() && gatheredSum < sum) {
  366. --current;
  367. gatheredSum += current->second;
  368. }
  369. if (gatheredSum < sum) {
  370. return MinValue;
  371. }
  372. TWeightedValue left(MinValue, 0.0);
  373. TWeightedValue right(MaxValue, 0.0);
  374. if (current != Bins.begin()) {
  375. TPairSet::iterator leftBin = current;
  376. --leftBin;
  377. left = *leftBin;
  378. }
  379. {
  380. TPairSet::iterator rightBin = current;
  381. ++rightBin;
  382. if (rightBin != Bins.end()) {
  383. right = *rightBin;
  384. }
  385. }
  386. double sumToAdd = sum - (gatheredSum - current->second - right.second / 2);
  387. if (sumToAdd <= ((current->second + right.second) / 2)) {
  388. return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second);
  389. } else {
  390. sumToAdd -= (current->second + right.second) / 2;
  391. return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second);
  392. }
  393. }
  394. double TAdaptiveHistogram::CalcUpperBoundSafe(double sum) {
  395. if (!Empty()) {
  396. sum = Max(Bins.begin()->second, sum);
  397. }
  398. return CalcUpperBound(sum);
  399. }
  400. double TAdaptiveHistogram::CalcLowerBoundSafe(double sum) {
  401. if (!Empty()) {
  402. sum = Max(Bins.rbegin()->second, sum);
  403. }
  404. return CalcLowerBound(sum);
  405. }
  406. void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) {
  407. if (!histo) {
  408. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer";
  409. }
  410. if (CalcQuality == CalcWardQuality) {
  411. ythrow yexception() << "Not implemented";
  412. } else if (CalcQuality != CalcDistanceQuality && CalcQuality != CalcWeightQuality) {
  413. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality";
  414. }
  415. Id = histo->GetId();
  416. if (histo->Empty()) {
  417. return;
  418. }
  419. double sum = histo->GetSum();
  420. double minValue = histo->GetMinValue();
  421. double maxValue = histo->GetMaxValue();
  422. if (minValue == maxValue) {
  423. Add(minValue, sum);
  424. return;
  425. }
  426. if (CalcQuality == CalcDistanceQuality) {
  427. double binRange = (maxValue - minValue) / (Intervals);
  428. for (size_t i = 0; i < Intervals; ++i) {
  429. Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1)));
  430. }
  431. } else if (CalcQuality == CalcWeightQuality && sum != 0.0) {
  432. double slab = sum / Intervals;
  433. double prevBound = minValue;
  434. for (size_t i = 0; i < Intervals; ++i) {
  435. double bound = histo->CalcUpperBound(slab * (i + 1));
  436. Add((bound + prevBound) / 2, slab);
  437. prevBound = bound;
  438. }
  439. }
  440. MinValue = minValue;
  441. MaxValue = maxValue;
  442. }
  443. void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) {
  444. const double& value = weightedValue.first;
  445. const double& weight = weightedValue.second;
  446. if (weight <= 0.0) {
  447. return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
  448. }
  449. if (initial) {
  450. Sum += weight;
  451. }
  452. if (Bins.size() == 0) {
  453. MinValue = value;
  454. MaxValue = value;
  455. Bins.insert(weightedValue);
  456. return;
  457. }
  458. if (value < MinValue) {
  459. MinValue = value;
  460. }
  461. if (value > MaxValue) {
  462. MaxValue = value;
  463. }
  464. TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  465. if (rightBin != Bins.end() && rightBin->first == value) {
  466. TPairSet::iterator currentBin = rightBin;
  467. ++rightBin;
  468. TWeightedValue newBin(value, weight + currentBin->second);
  469. if (rightBin != Bins.end()) {
  470. Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) == 1, "Erase failed");
  471. BinsByQuality.insert(CalcQuality(newBin, *rightBin));
  472. }
  473. if (currentBin != Bins.begin()) {
  474. TPairSet::iterator leftBin = currentBin;
  475. --leftBin;
  476. Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) == 1, "Erase failed");
  477. BinsByQuality.insert(CalcQuality(*leftBin, newBin));
  478. }
  479. Bins.erase(currentBin);
  480. Bins.insert(newBin);
  481. return;
  482. }
  483. if (rightBin == Bins.begin()) {
  484. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  485. } else {
  486. TPairSet::iterator leftBin = rightBin;
  487. --leftBin;
  488. if (rightBin == Bins.end()) {
  489. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  490. } else {
  491. Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *rightBin)) == 1, "Erase failed");
  492. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  493. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  494. }
  495. }
  496. Bins.insert(weightedValue);
  497. if (Bins.size() > Intervals) {
  498. Shrink();
  499. }
  500. }
  501. void TAdaptiveHistogram::Erase(double value) {
  502. TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  503. Y_VERIFY(currentBin != Bins.end() && currentBin->first == value, "Can't find bin that should be erased");
  504. TPairSet::iterator rightBin = currentBin;
  505. ++rightBin;
  506. if (currentBin == Bins.begin()) {
  507. Y_VERIFY(rightBin != Bins.end(), "No right bin for the first bin");
  508. Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  509. } else {
  510. TPairSet::iterator leftBin = currentBin;
  511. --leftBin;
  512. if (rightBin == Bins.end()) {
  513. Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  514. } else {
  515. Y_VERIFY(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  516. Y_VERIFY(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  517. BinsByQuality.insert(CalcQuality(*leftBin, *rightBin));
  518. }
  519. }
  520. Bins.erase(currentBin);
  521. }
  522. void TAdaptiveHistogram::Shrink() {
  523. TPairSet::iterator worstBin = BinsByQuality.begin();
  524. Y_VERIFY(worstBin != BinsByQuality.end(), "No right bin for the first bin");
  525. TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0));
  526. Y_VERIFY(leftBin != Bins.end() && leftBin->first == worstBin->second, "Can't find worst bin");
  527. TPairSet::iterator rightBin = leftBin;
  528. ++rightBin;
  529. Y_VERIFY(rightBin != Bins.end(), "Can't find right bin");
  530. TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second);
  531. if (Bins.size() > 2) {
  532. Erase(leftBin->first);
  533. Erase(rightBin->first);
  534. } else {
  535. Bins.clear();
  536. BinsByQuality.clear();
  537. }
  538. Add(newBin, false);
  539. }
  540. void TAdaptiveHistogram::PrecomputePartialSums() {
  541. PrecomputedBins.clear();
  542. PrecomputedBins.reserve(Bins.size());
  543. double currentSum = 0;
  544. for (const auto& bin : Bins) {
  545. PrecomputedBins.emplace_back(bin.first, bin.second, currentSum, Sum - currentSum - bin.second);
  546. currentSum += bin.second;
  547. }
  548. }
  549. template <typename TBins, typename TGetSumAbove>
  550. double TAdaptiveHistogram::GetSumAboveBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumAbove& getSumAbove) const {
  551. typename TBins::value_type left(MinValue, 0.0);
  552. typename TBins::value_type right(MaxValue, 0.0);
  553. if (rightBin != bins.end()) {
  554. right = *rightBin;
  555. }
  556. if (rightBin != bins.begin()) {
  557. typename TBins::const_iterator leftBin = rightBin;
  558. --leftBin;
  559. left = *leftBin;
  560. }
  561. double sum = (right.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (right.first - bound) / (right.first - left.first)));
  562. if (rightBin == bins.end()) {
  563. return sum;
  564. }
  565. sum += getSumAbove(rightBin);
  566. return sum;
  567. }
  568. template <typename TBins, typename TGetSumBelow>
  569. double TAdaptiveHistogram::GetSumBelowBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumBelow& getSumBelow) const {
  570. typename TBins::value_type left(MinValue, 0.0);
  571. typename TBins::value_type right(MaxValue, 0.0);
  572. if (rightBin != bins.end()) {
  573. right = *rightBin;
  574. }
  575. if (rightBin != bins.begin()) {
  576. typename TBins::const_iterator leftBin = rightBin;
  577. --leftBin;
  578. left = *leftBin;
  579. }
  580. double sum = (left.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (bound - left.first) / (right.first - left.first)));
  581. if (rightBin == bins.begin()) {
  582. return sum;
  583. }
  584. --rightBin;
  585. sum += getSumBelow(rightBin);
  586. return sum;
  587. }
  588. }