adaptive_histogram.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639
  1. #include "adaptive_histogram.h"
  2. #include <util/generic/algorithm.h>
  3. #include <util/generic/yexception.h>
  4. #include <util/generic/ymath.h>
  5. #include <util/string/printf.h>
  6. #include <util/system/backtrace.h>
  7. namespace NKiwiAggr {
  8. TAdaptiveHistogram::TAdaptiveHistogram(size_t intervals, ui64 id, TQualityFunction qualityFunc)
  9. : Id(id)
  10. , MinValue(0.0)
  11. , MaxValue(0.0)
  12. , Sum(0.0)
  13. , Intervals(intervals)
  14. , CalcQuality(qualityFunc)
  15. {
  16. }
  17. TAdaptiveHistogram::TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  18. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  19. {
  20. FromProto(histo);
  21. }
  22. TAdaptiveHistogram::TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  23. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  24. {
  25. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo);
  26. if (!adaptiveHisto) {
  27. FromIHistogram(histo);
  28. return;
  29. }
  30. Id = adaptiveHisto->Id;
  31. MinValue = adaptiveHisto->MinValue;
  32. MaxValue = adaptiveHisto->MaxValue;
  33. Sum = adaptiveHisto->Sum;
  34. Intervals = adaptiveHisto->Intervals;
  35. Bins = adaptiveHisto->Bins;
  36. BinsByQuality = adaptiveHisto->BinsByQuality;
  37. if (CalcQuality == nullptr) {
  38. CalcQuality = adaptiveHisto->CalcQuality;
  39. }
  40. }
  41. TQualityFunction TAdaptiveHistogram::GetQualityFunc() {
  42. return CalcQuality;
  43. }
  44. void TAdaptiveHistogram::Clear() {
  45. Sum = 0.0;
  46. Bins.clear();
  47. BinsByQuality.clear();
  48. }
  49. void TAdaptiveHistogram::Add(const THistoRec& histoRec) {
  50. if (!histoRec.HasId() || histoRec.GetId() == Id) {
  51. Add(histoRec.GetValue(), histoRec.GetWeight());
  52. }
  53. }
  54. void TAdaptiveHistogram::Add(double value, double weight) {
  55. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  56. ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
  57. }
  58. TWeightedValue weightedValue(value, weight);
  59. Add(weightedValue, true);
  60. PrecomputedBins.clear();
  61. }
  62. void TAdaptiveHistogram::Merge(const THistogram& histo, double multiplier) {
  63. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  64. fprintf(stderr, "Merging in histogram id %lu: skip bad histo with minvalue %f maxvalue %f\n", Id, histo.GetMinValue(), histo.GetMaxValue());
  65. return;
  66. }
  67. if (histo.FreqSize() == 0) {
  68. return; // skip empty histos
  69. }
  70. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM ||
  71. histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM ||
  72. histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM ||
  73. histo.GetType() == HT_ADAPTIVE_HISTOGRAM)
  74. {
  75. Y_ABORT_UNLESS(histo.FreqSize() == histo.PositionSize(), "Corrupted histo");
  76. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  77. double value = histo.GetPosition(j);
  78. double weight = histo.GetFreq(j);
  79. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  80. fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
  81. continue;
  82. }
  83. Add(value, weight * multiplier);
  84. }
  85. MinValue = Min(MinValue, histo.GetMinValue());
  86. MaxValue = Max(MaxValue, histo.GetMaxValue());
  87. } else if (histo.GetType() == HT_FIXED_BIN_HISTOGRAM) {
  88. double pos = histo.GetMinValue() + histo.GetBinRange() / 2.0;
  89. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  90. double weight = histo.GetFreq(j);
  91. if (!IsValidFloat(pos) || !IsValidFloat(weight)) {
  92. fprintf(stderr, "Merging in histogram id %lu: skip bad value %f weight %f\n", Id, pos, weight);
  93. pos += histo.GetBinRange();
  94. continue;
  95. }
  96. Add(pos, weight * multiplier);
  97. pos += histo.GetBinRange();
  98. }
  99. MinValue = Min(MinValue, histo.GetMinValue());
  100. MaxValue = Max(MaxValue, histo.GetMaxValue());
  101. } else {
  102. ythrow yexception() << "Unknown THistogram type";
  103. }
  104. }
  105. void TAdaptiveHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
  106. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  107. Merge(histogramsToMerge[i], 1.0);
  108. }
  109. }
  110. void TAdaptiveHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
  111. TVector<IHistogramPtr> histogramsToMergeRepacked(0);
  112. TVector<TAdaptiveHistogram*> histograms(0);
  113. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  114. if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
  115. continue;
  116. }
  117. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get());
  118. if (adaptiveHisto) {
  119. histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
  120. } else {
  121. histogramsToMergeRepacked.push_back(IHistogramPtr(new TAdaptiveHistogram(histogramsToMerge[i].Get(), Intervals, Id, CalcQuality))); // Convert histograms that are not of TFixedBinHistogram type
  122. }
  123. if (histogramsToMergeRepacked.back()->Empty()) {
  124. continue;
  125. }
  126. histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get()));
  127. }
  128. if (histograms.size() == 0) {
  129. return;
  130. }
  131. for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
  132. TAdaptiveHistogram* histo = histograms[histoIndex];
  133. for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) {
  134. Add(*it, true);
  135. }
  136. }
  137. for (size_t i = 0; i < histograms.size(); ++i) {
  138. MinValue = Min(MinValue, histograms[i]->MinValue);
  139. MaxValue = Max(MaxValue, histograms[i]->MaxValue);
  140. }
  141. }
  142. void TAdaptiveHistogram::Multiply(double factor) {
  143. if (!IsValidFloat(factor) || factor <= 0) {
  144. ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
  145. }
  146. Sum *= factor;
  147. TPairSet newBins;
  148. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  149. newBins.insert(TWeightedValue(it->first, it->second * factor));
  150. }
  151. Bins = newBins;
  152. TPairSet newBinsByQuality;
  153. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  154. TPairSet::iterator rightBin = it;
  155. ++rightBin;
  156. if (rightBin == Bins.end()) {
  157. break;
  158. }
  159. newBinsByQuality.insert(CalcQuality(*it, *rightBin));
  160. }
  161. BinsByQuality = newBinsByQuality;
  162. }
  163. void TAdaptiveHistogram::FromProto(const THistogram& histo) {
  164. Y_ABORT_UNLESS(histo.HasType(), "Attempt to parse TAdaptiveHistogram from THistogram protobuf with no Type field set");
  165. ;
  166. switch (histo.GetType()) { // check that histogram type could be deduced
  167. case HT_ADAPTIVE_DISTANCE_HISTOGRAM:
  168. case HT_ADAPTIVE_WEIGHT_HISTOGRAM:
  169. case HT_ADAPTIVE_WARD_HISTOGRAM:
  170. break; // ok
  171. case HT_ADAPTIVE_HISTOGRAM:
  172. if (CalcQuality != nullptr)
  173. break; // ok
  174. [[fallthrough]];
  175. default: // not ok
  176. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType();
  177. }
  178. if (histo.FreqSize() != histo.PositionSize()) {
  179. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize();
  180. }
  181. if (CalcQuality == nullptr) {
  182. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
  183. CalcQuality = CalcDistanceQuality;
  184. } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
  185. CalcQuality = CalcWeightQuality;
  186. } else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) {
  187. CalcQuality = CalcWardQuality;
  188. } else {
  189. ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function";
  190. }
  191. }
  192. Id = histo.GetId();
  193. Sum = 0.0;
  194. Intervals = Max(Intervals, histo.FreqSize());
  195. for (size_t i = 0; i < histo.FreqSize(); ++i) {
  196. double value = histo.GetPosition(i);
  197. double weight = histo.GetFreq(i);
  198. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  199. fprintf(stderr, "FromProto in histogram id %lu: skip bad value %f weight %f\n", Id, value, weight);
  200. continue;
  201. }
  202. Add(value, weight);
  203. }
  204. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  205. ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());
  206. }
  207. MinValue = histo.GetMinValue();
  208. MaxValue = histo.GetMaxValue();
  209. }
  210. void TAdaptiveHistogram::ToProto(THistogram& histo) {
  211. histo.Clear();
  212. if (CalcQuality == CalcDistanceQuality) {
  213. histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM);
  214. } else if (CalcQuality == CalcWeightQuality) {
  215. histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM);
  216. } else if (CalcQuality == CalcWardQuality) {
  217. histo.SetType(HT_ADAPTIVE_WARD_HISTOGRAM);
  218. } else {
  219. histo.SetType(HT_ADAPTIVE_HISTOGRAM);
  220. }
  221. histo.SetId(Id);
  222. if (Empty()) {
  223. return;
  224. }
  225. histo.SetMinValue(MinValue);
  226. histo.SetMaxValue(MaxValue);
  227. for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) {
  228. histo.AddFreq(it->second);
  229. histo.AddPosition(it->first);
  230. }
  231. }
  232. void TAdaptiveHistogram::SetId(ui64 id) {
  233. Id = id;
  234. }
  235. ui64 TAdaptiveHistogram::GetId() {
  236. return Id;
  237. }
  238. bool TAdaptiveHistogram::Empty() {
  239. return Bins.size() == 0;
  240. }
  241. double TAdaptiveHistogram::GetMinValue() {
  242. return MinValue;
  243. }
  244. double TAdaptiveHistogram::GetMaxValue() {
  245. return MaxValue;
  246. }
  247. double TAdaptiveHistogram::GetSum() {
  248. return Sum;
  249. }
  250. double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) {
  251. if (leftBound > rightBound) {
  252. return 0.0;
  253. }
  254. return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
  255. }
  256. double TAdaptiveHistogram::GetSumAboveBound(double bound) {
  257. if (Empty()) {
  258. return 0.0;
  259. }
  260. if (bound < MinValue) {
  261. return Sum;
  262. }
  263. if (bound > MaxValue) {
  264. return 0.0;
  265. }
  266. if (!PrecomputedBins.empty()) {
  267. return GetSumAboveBoundImpl(
  268. bound,
  269. PrecomputedBins,
  270. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  271. [](const auto& it) { return it->SumAbove; });
  272. } else {
  273. return GetSumAboveBoundImpl(
  274. bound,
  275. Bins,
  276. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  277. [this](TPairSet::const_iterator rightBin) {
  278. ++rightBin;
  279. double sum = 0;
  280. for (TPairSet::const_iterator it = rightBin; it != Bins.end(); ++it) {
  281. sum += it->second;
  282. }
  283. return sum;
  284. });
  285. }
  286. }
  287. double TAdaptiveHistogram::GetSumBelowBound(double bound) {
  288. if (Empty()) {
  289. return 0.0;
  290. }
  291. if (bound < MinValue) {
  292. return 0.0;
  293. }
  294. if (bound > MaxValue) {
  295. return Sum;
  296. }
  297. if (!PrecomputedBins.empty()) {
  298. return GetSumBelowBoundImpl(
  299. bound,
  300. PrecomputedBins,
  301. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  302. [](const auto& it) { return it->SumBelow; });
  303. } else {
  304. return GetSumBelowBoundImpl(
  305. bound,
  306. Bins,
  307. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  308. [this](TPairSet::const_iterator rightBin) {
  309. double sum = 0;
  310. for (TPairSet::iterator it = Bins.begin(); it != rightBin; ++it) {
  311. sum += it->second;
  312. }
  313. return sum;
  314. });
  315. }
  316. }
  317. double TAdaptiveHistogram::CalcUpperBound(double sum) {
  318. Y_ABORT_UNLESS(sum >= 0, "Sum must be >= 0");
  319. if (sum == 0.0) {
  320. return MinValue;
  321. }
  322. if (Empty()) {
  323. return MaxValue;
  324. }
  325. TPairSet::iterator current = Bins.begin();
  326. double gatheredSum = 0.0;
  327. while (current != Bins.end() && gatheredSum < sum) {
  328. gatheredSum += current->second;
  329. ++current;
  330. }
  331. --current;
  332. if (gatheredSum < sum) {
  333. return MaxValue;
  334. }
  335. TWeightedValue left(MinValue, 0.0);
  336. TWeightedValue right(MaxValue, 0.0);
  337. if (current != Bins.begin()) {
  338. TPairSet::iterator leftBin = current;
  339. --leftBin;
  340. left = *leftBin;
  341. }
  342. {
  343. TPairSet::iterator rightBin = current;
  344. ++rightBin;
  345. if (rightBin != Bins.end()) {
  346. right = *rightBin;
  347. }
  348. }
  349. double sumToAdd = sum - (gatheredSum - current->second - left.second / 2);
  350. if (sumToAdd <= ((current->second + left.second) / 2)) {
  351. return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second);
  352. } else {
  353. sumToAdd -= (current->second + left.second) / 2;
  354. return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second);
  355. }
  356. }
  357. double TAdaptiveHistogram::CalcLowerBound(double sum) {
  358. Y_ABORT_UNLESS(sum >= 0, "Sum must be >= 0");
  359. if (sum == 0.0) {
  360. return MaxValue;
  361. }
  362. if (Empty()) {
  363. return MinValue;
  364. }
  365. TPairSet::iterator current = Bins.end();
  366. double gatheredSum = 0.0;
  367. while (current != Bins.begin() && gatheredSum < sum) {
  368. --current;
  369. gatheredSum += current->second;
  370. }
  371. if (gatheredSum < sum) {
  372. return MinValue;
  373. }
  374. TWeightedValue left(MinValue, 0.0);
  375. TWeightedValue right(MaxValue, 0.0);
  376. if (current != Bins.begin()) {
  377. TPairSet::iterator leftBin = current;
  378. --leftBin;
  379. left = *leftBin;
  380. }
  381. {
  382. TPairSet::iterator rightBin = current;
  383. ++rightBin;
  384. if (rightBin != Bins.end()) {
  385. right = *rightBin;
  386. }
  387. }
  388. double sumToAdd = sum - (gatheredSum - current->second - right.second / 2);
  389. if (sumToAdd <= ((current->second + right.second) / 2)) {
  390. return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second);
  391. } else {
  392. sumToAdd -= (current->second + right.second) / 2;
  393. return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second);
  394. }
  395. }
  396. double TAdaptiveHistogram::CalcUpperBoundSafe(double sum) {
  397. if (!Empty()) {
  398. sum = Max(Bins.begin()->second, sum);
  399. }
  400. return CalcUpperBound(sum);
  401. }
  402. double TAdaptiveHistogram::CalcLowerBoundSafe(double sum) {
  403. if (!Empty()) {
  404. sum = Max(Bins.rbegin()->second, sum);
  405. }
  406. return CalcLowerBound(sum);
  407. }
  408. void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) {
  409. if (!histo) {
  410. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer";
  411. }
  412. if (CalcQuality == CalcWardQuality) {
  413. ythrow yexception() << "Not implemented";
  414. } else if (CalcQuality != CalcDistanceQuality && CalcQuality != CalcWeightQuality) {
  415. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality";
  416. }
  417. Id = histo->GetId();
  418. if (histo->Empty()) {
  419. return;
  420. }
  421. double sum = histo->GetSum();
  422. double minValue = histo->GetMinValue();
  423. double maxValue = histo->GetMaxValue();
  424. if (minValue == maxValue) {
  425. Add(minValue, sum);
  426. return;
  427. }
  428. if (CalcQuality == CalcDistanceQuality) {
  429. double binRange = (maxValue - minValue) / (Intervals);
  430. for (size_t i = 0; i < Intervals; ++i) {
  431. Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1)));
  432. }
  433. } else if (CalcQuality == CalcWeightQuality && sum != 0.0) {
  434. double slab = sum / Intervals;
  435. double prevBound = minValue;
  436. for (size_t i = 0; i < Intervals; ++i) {
  437. double bound = histo->CalcUpperBound(slab * (i + 1));
  438. Add((bound + prevBound) / 2, slab);
  439. prevBound = bound;
  440. }
  441. }
  442. MinValue = minValue;
  443. MaxValue = maxValue;
  444. }
  445. void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) {
  446. const double& value = weightedValue.first;
  447. const double& weight = weightedValue.second;
  448. if (weight <= 0.0) {
  449. return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
  450. }
  451. if (initial) {
  452. Sum += weight;
  453. }
  454. if (Bins.size() == 0) {
  455. MinValue = value;
  456. MaxValue = value;
  457. Bins.insert(weightedValue);
  458. return;
  459. }
  460. if (value < MinValue) {
  461. MinValue = value;
  462. }
  463. if (value > MaxValue) {
  464. MaxValue = value;
  465. }
  466. TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  467. if (rightBin != Bins.end() && rightBin->first == value) {
  468. TPairSet::iterator currentBin = rightBin;
  469. ++rightBin;
  470. TWeightedValue newBin(value, weight + currentBin->second);
  471. if (rightBin != Bins.end()) {
  472. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) == 1, "Erase failed");
  473. BinsByQuality.insert(CalcQuality(newBin, *rightBin));
  474. }
  475. if (currentBin != Bins.begin()) {
  476. TPairSet::iterator leftBin = currentBin;
  477. --leftBin;
  478. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) == 1, "Erase failed");
  479. BinsByQuality.insert(CalcQuality(*leftBin, newBin));
  480. }
  481. Bins.erase(currentBin);
  482. Bins.insert(newBin);
  483. return;
  484. }
  485. if (rightBin == Bins.begin()) {
  486. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  487. } else {
  488. TPairSet::iterator leftBin = rightBin;
  489. --leftBin;
  490. if (rightBin == Bins.end()) {
  491. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  492. } else {
  493. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *rightBin)) == 1, "Erase failed");
  494. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  495. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  496. }
  497. }
  498. Bins.insert(weightedValue);
  499. if (Bins.size() > Intervals) {
  500. Shrink();
  501. }
  502. }
  503. void TAdaptiveHistogram::Erase(double value) {
  504. TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  505. Y_ABORT_UNLESS(currentBin != Bins.end() && currentBin->first == value, "Can't find bin that should be erased");
  506. TPairSet::iterator rightBin = currentBin;
  507. ++rightBin;
  508. if (currentBin == Bins.begin()) {
  509. Y_ABORT_UNLESS(rightBin != Bins.end(), "No right bin for the first bin");
  510. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  511. } else {
  512. TPairSet::iterator leftBin = currentBin;
  513. --leftBin;
  514. if (rightBin == Bins.end()) {
  515. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  516. } else {
  517. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  518. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  519. BinsByQuality.insert(CalcQuality(*leftBin, *rightBin));
  520. }
  521. }
  522. Bins.erase(currentBin);
  523. }
  524. void TAdaptiveHistogram::Shrink() {
  525. TPairSet::iterator worstBin = BinsByQuality.begin();
  526. Y_ABORT_UNLESS(worstBin != BinsByQuality.end(), "No right bin for the first bin");
  527. TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0));
  528. Y_ABORT_UNLESS(leftBin != Bins.end() && leftBin->first == worstBin->second, "Can't find worst bin");
  529. TPairSet::iterator rightBin = leftBin;
  530. ++rightBin;
  531. Y_ABORT_UNLESS(rightBin != Bins.end(), "Can't find right bin");
  532. TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second);
  533. if (Bins.size() > 2) {
  534. Erase(leftBin->first);
  535. Erase(rightBin->first);
  536. } else {
  537. Bins.clear();
  538. BinsByQuality.clear();
  539. }
  540. Add(newBin, false);
  541. }
  542. void TAdaptiveHistogram::PrecomputePartialSums() {
  543. PrecomputedBins.clear();
  544. PrecomputedBins.reserve(Bins.size());
  545. double currentSum = 0;
  546. for (const auto& bin : Bins) {
  547. PrecomputedBins.emplace_back(bin.first, bin.second, currentSum, Sum - currentSum - bin.second);
  548. currentSum += bin.second;
  549. }
  550. }
  551. template <typename TBins, typename TGetSumAbove>
  552. double TAdaptiveHistogram::GetSumAboveBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumAbove& getSumAbove) const {
  553. typename TBins::value_type left(MinValue, 0.0);
  554. typename TBins::value_type right(MaxValue, 0.0);
  555. if (rightBin != bins.end()) {
  556. right = *rightBin;
  557. }
  558. if (rightBin != bins.begin()) {
  559. typename TBins::const_iterator leftBin = rightBin;
  560. --leftBin;
  561. left = *leftBin;
  562. }
  563. double sum = (right.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (right.first - bound) / (right.first - left.first)));
  564. if (rightBin == bins.end()) {
  565. return sum;
  566. }
  567. sum += getSumAbove(rightBin);
  568. return sum;
  569. }
  570. template <typename TBins, typename TGetSumBelow>
  571. double TAdaptiveHistogram::GetSumBelowBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumBelow& getSumBelow) const {
  572. typename TBins::value_type left(MinValue, 0.0);
  573. typename TBins::value_type right(MaxValue, 0.0);
  574. if (rightBin != bins.end()) {
  575. right = *rightBin;
  576. }
  577. if (rightBin != bins.begin()) {
  578. typename TBins::const_iterator leftBin = rightBin;
  579. --leftBin;
  580. left = *leftBin;
  581. }
  582. double sum = (left.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (bound - left.first) / (right.first - left.first)));
  583. if (rightBin == bins.begin()) {
  584. return sum;
  585. }
  586. --rightBin;
  587. sum += getSumBelow(rightBin);
  588. return sum;
  589. }
  590. }