adaptive_histogram.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. #include "adaptive_histogram.h"
  2. #include <util/generic/algorithm.h>
  3. #include <util/generic/yexception.h>
  4. #include <util/generic/ymath.h>
  5. #include <util/string/printf.h>
  6. #include <util/system/backtrace.h>
  7. #include <format>
  8. namespace NKiwiAggr {
  9. TAdaptiveHistogram::TAdaptiveHistogram(size_t intervals, ui64 id, TQualityFunction qualityFunc)
  10. : Id(id)
  11. , MinValue(0.0)
  12. , MaxValue(0.0)
  13. , Sum(0.0)
  14. , Intervals(intervals)
  15. , CalcQuality(qualityFunc)
  16. {
  17. }
  18. TAdaptiveHistogram::TAdaptiveHistogram(const THistogram& histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  19. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  20. {
  21. FromProto(histo);
  22. }
  23. TAdaptiveHistogram::TAdaptiveHistogram(IHistogram* histo, size_t defaultIntervals, ui64 defaultId, TQualityFunction qualityFunc)
  24. : TAdaptiveHistogram(defaultIntervals, defaultId, qualityFunc)
  25. {
  26. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histo);
  27. if (!adaptiveHisto) {
  28. FromIHistogram(histo);
  29. return;
  30. }
  31. Id = adaptiveHisto->Id;
  32. MinValue = adaptiveHisto->MinValue;
  33. MaxValue = adaptiveHisto->MaxValue;
  34. Sum = adaptiveHisto->Sum;
  35. Intervals = adaptiveHisto->Intervals;
  36. Bins = adaptiveHisto->Bins;
  37. BinsByQuality = adaptiveHisto->BinsByQuality;
  38. if (CalcQuality == nullptr) {
  39. CalcQuality = adaptiveHisto->CalcQuality;
  40. }
  41. }
  42. TQualityFunction TAdaptiveHistogram::GetQualityFunc() {
  43. return CalcQuality;
  44. }
  45. void TAdaptiveHistogram::Clear() {
  46. Sum = 0.0;
  47. Bins.clear();
  48. BinsByQuality.clear();
  49. }
  50. void TAdaptiveHistogram::Add(const THistoRec& histoRec) {
  51. if (!histoRec.HasId() || histoRec.GetId() == Id) {
  52. Add(histoRec.GetValue(), histoRec.GetWeight());
  53. }
  54. }
  55. void TAdaptiveHistogram::Add(double value, double weight) {
  56. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  57. ythrow yexception() << Sprintf("Histogram id %lu: bad value %f weight %f", Id, value, weight);
  58. }
  59. TWeightedValue weightedValue(value, weight);
  60. Add(weightedValue, true);
  61. PrecomputedBins.clear();
  62. }
  63. void TAdaptiveHistogram::Merge(const THistogram& histo, double multiplier) {
  64. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  65. Cerr << std::format(
  66. "Merging in histogram id {}: skip bad histo with minvalue {} maxvalue {}\n",
  67. Id, histo.GetMinValue(), histo.GetMaxValue()
  68. );
  69. return;
  70. }
  71. if (histo.FreqSize() == 0) {
  72. return; // skip empty histos
  73. }
  74. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM ||
  75. histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM ||
  76. histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM ||
  77. histo.GetType() == HT_ADAPTIVE_HISTOGRAM)
  78. {
  79. Y_ABORT_UNLESS(histo.FreqSize() == histo.PositionSize(), "Corrupted histo");
  80. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  81. double value = histo.GetPosition(j);
  82. double weight = histo.GetFreq(j);
  83. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  84. Cerr << std::format(
  85. "Merging in histogram id {}: skip bad value {} weight {}\n",
  86. Id, value, weight
  87. );
  88. continue;
  89. }
  90. Add(value, weight * multiplier);
  91. }
  92. MinValue = Min(MinValue, histo.GetMinValue());
  93. MaxValue = Max(MaxValue, histo.GetMaxValue());
  94. } else if (histo.GetType() == HT_FIXED_BIN_HISTOGRAM) {
  95. double pos = histo.GetMinValue() + histo.GetBinRange() / 2.0;
  96. for (size_t j = 0; j < histo.FreqSize(); ++j) {
  97. double weight = histo.GetFreq(j);
  98. if (!IsValidFloat(pos) || !IsValidFloat(weight)) {
  99. Cerr << std::format(
  100. "Merging in histogram id {}: skip bad value {} weight {}\n",
  101. Id, pos, weight
  102. );
  103. pos += histo.GetBinRange();
  104. continue;
  105. }
  106. Add(pos, weight * multiplier);
  107. pos += histo.GetBinRange();
  108. }
  109. MinValue = Min(MinValue, histo.GetMinValue());
  110. MaxValue = Max(MaxValue, histo.GetMaxValue());
  111. } else {
  112. ythrow yexception() << "Unknown THistogram type";
  113. }
  114. }
  115. void TAdaptiveHistogram::Merge(const TVector<THistogram>& histogramsToMerge) {
  116. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  117. Merge(histogramsToMerge[i], 1.0);
  118. }
  119. }
  120. void TAdaptiveHistogram::Merge(TVector<IHistogramPtr> histogramsToMerge) {
  121. TVector<IHistogramPtr> histogramsToMergeRepacked(0);
  122. TVector<TAdaptiveHistogram*> histograms(0);
  123. for (size_t i = 0; i < histogramsToMerge.size(); ++i) {
  124. if (!histogramsToMerge[i] || histogramsToMerge[i]->Empty()) {
  125. continue;
  126. }
  127. TAdaptiveHistogram* adaptiveHisto = dynamic_cast<TAdaptiveHistogram*>(histogramsToMerge[i].Get());
  128. if (adaptiveHisto) {
  129. histogramsToMergeRepacked.push_back(histogramsToMerge[i]);
  130. } else {
  131. histogramsToMergeRepacked.push_back(IHistogramPtr(new TAdaptiveHistogram(histogramsToMerge[i].Get(), Intervals, Id, CalcQuality))); // Convert histograms that are not of TFixedBinHistogram type
  132. }
  133. if (histogramsToMergeRepacked.back()->Empty()) {
  134. continue;
  135. }
  136. histograms.push_back(dynamic_cast<TAdaptiveHistogram*>(histogramsToMergeRepacked.back().Get()));
  137. }
  138. if (histograms.size() == 0) {
  139. return;
  140. }
  141. for (size_t histoIndex = 0; histoIndex < histograms.size(); ++histoIndex) {
  142. TAdaptiveHistogram* histo = histograms[histoIndex];
  143. for (TPairSet::const_iterator it = histo->Bins.begin(); it != histo->Bins.end(); ++it) {
  144. Add(*it, true);
  145. }
  146. }
  147. for (size_t i = 0; i < histograms.size(); ++i) {
  148. MinValue = Min(MinValue, histograms[i]->MinValue);
  149. MaxValue = Max(MaxValue, histograms[i]->MaxValue);
  150. }
  151. }
  152. void TAdaptiveHistogram::Multiply(double factor) {
  153. if (!IsValidFloat(factor) || factor <= 0) {
  154. ythrow yexception() << "Not valid factor in IHistogram::Multiply(): " << factor;
  155. }
  156. Sum *= factor;
  157. TPairSet newBins;
  158. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  159. newBins.insert(TWeightedValue(it->first, it->second * factor));
  160. }
  161. Bins = newBins;
  162. TPairSet newBinsByQuality;
  163. for (TPairSet::iterator it = Bins.begin(); it != Bins.end(); ++it) {
  164. TPairSet::iterator rightBin = it;
  165. ++rightBin;
  166. if (rightBin == Bins.end()) {
  167. break;
  168. }
  169. newBinsByQuality.insert(CalcQuality(*it, *rightBin));
  170. }
  171. BinsByQuality = newBinsByQuality;
  172. }
  173. void TAdaptiveHistogram::FromProto(const THistogram& histo) {
  174. Y_ABORT_UNLESS(histo.HasType(), "Attempt to parse TAdaptiveHistogram from THistogram protobuf with no Type field set");
  175. ;
  176. switch (histo.GetType()) { // check that histogram type could be deduced
  177. case HT_ADAPTIVE_DISTANCE_HISTOGRAM:
  178. case HT_ADAPTIVE_WEIGHT_HISTOGRAM:
  179. case HT_ADAPTIVE_WARD_HISTOGRAM:
  180. break; // ok
  181. case HT_ADAPTIVE_HISTOGRAM:
  182. if (CalcQuality != nullptr)
  183. break; // ok
  184. [[fallthrough]];
  185. default: // not ok
  186. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record of type = " << (ui32)histo.GetType();
  187. }
  188. if (histo.FreqSize() != histo.PositionSize()) {
  189. ythrow yexception() << "Attempt to parse TAdaptiveHistogram from THistogram protobuf record where FreqSize != PositionSize. FreqSize == " << (ui32)histo.FreqSize() << ", PositionSize == " << (ui32)histo.PositionSize();
  190. }
  191. if (CalcQuality == nullptr) {
  192. if (histo.GetType() == HT_ADAPTIVE_DISTANCE_HISTOGRAM) {
  193. CalcQuality = CalcDistanceQuality;
  194. } else if (histo.GetType() == HT_ADAPTIVE_WEIGHT_HISTOGRAM) {
  195. CalcQuality = CalcWeightQuality;
  196. } else if (histo.GetType() == HT_ADAPTIVE_WARD_HISTOGRAM) {
  197. CalcQuality = CalcWardQuality;
  198. } else {
  199. ythrow yexception() << "Attempt to parse an HT_ADAPTIVE_HISTOGRAM without default quality function";
  200. }
  201. }
  202. Id = histo.GetId();
  203. Sum = 0.0;
  204. Intervals = Max(Intervals, histo.FreqSize());
  205. for (size_t i = 0; i < histo.FreqSize(); ++i) {
  206. double value = histo.GetPosition(i);
  207. double weight = histo.GetFreq(i);
  208. if (!IsValidFloat(value) || !IsValidFloat(weight)) {
  209. Cerr << std::format(
  210. "FromProto in histogram id {}: skip bad value {} weight {}\n",
  211. Id, value, weight
  212. );
  213. continue;
  214. }
  215. Add(value, weight);
  216. }
  217. if (!IsValidFloat(histo.GetMinValue()) || !IsValidFloat(histo.GetMaxValue())) {
  218. ythrow yexception() << Sprintf("FromProto in histogram id %lu: skip bad histo with minvalue %f maxvalue %f", Id, histo.GetMinValue(), histo.GetMaxValue());
  219. }
  220. MinValue = histo.GetMinValue();
  221. MaxValue = histo.GetMaxValue();
  222. }
  223. void TAdaptiveHistogram::ToProto(THistogram& histo) {
  224. histo.Clear();
  225. if (CalcQuality == CalcDistanceQuality) {
  226. histo.SetType(HT_ADAPTIVE_DISTANCE_HISTOGRAM);
  227. } else if (CalcQuality == CalcWeightQuality) {
  228. histo.SetType(HT_ADAPTIVE_WEIGHT_HISTOGRAM);
  229. } else if (CalcQuality == CalcWardQuality) {
  230. histo.SetType(HT_ADAPTIVE_WARD_HISTOGRAM);
  231. } else {
  232. histo.SetType(HT_ADAPTIVE_HISTOGRAM);
  233. }
  234. histo.SetId(Id);
  235. if (Empty()) {
  236. return;
  237. }
  238. histo.SetMinValue(MinValue);
  239. histo.SetMaxValue(MaxValue);
  240. for (TPairSet::const_iterator it = Bins.begin(); it != Bins.end(); ++it) {
  241. histo.AddFreq(it->second);
  242. histo.AddPosition(it->first);
  243. }
  244. }
  245. void TAdaptiveHistogram::SetId(ui64 id) {
  246. Id = id;
  247. }
  248. ui64 TAdaptiveHistogram::GetId() {
  249. return Id;
  250. }
  251. bool TAdaptiveHistogram::Empty() {
  252. return Bins.size() == 0;
  253. }
  254. double TAdaptiveHistogram::GetMinValue() {
  255. return MinValue;
  256. }
  257. double TAdaptiveHistogram::GetMaxValue() {
  258. return MaxValue;
  259. }
  260. double TAdaptiveHistogram::GetSum() {
  261. return Sum;
  262. }
  263. double TAdaptiveHistogram::GetSumInRange(double leftBound, double rightBound) {
  264. if (leftBound > rightBound) {
  265. return 0.0;
  266. }
  267. return GetSumAboveBound(leftBound) + GetSumBelowBound(rightBound) - Sum;
  268. }
  269. double TAdaptiveHistogram::GetSumAboveBound(double bound) {
  270. if (Empty()) {
  271. return 0.0;
  272. }
  273. if (bound < MinValue) {
  274. return Sum;
  275. }
  276. if (bound > MaxValue) {
  277. return 0.0;
  278. }
  279. if (!PrecomputedBins.empty()) {
  280. return GetSumAboveBoundImpl(
  281. bound,
  282. PrecomputedBins,
  283. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  284. [](const auto& it) { return it->SumAbove; });
  285. } else {
  286. return GetSumAboveBoundImpl(
  287. bound,
  288. Bins,
  289. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  290. [this](TPairSet::const_iterator rightBin) {
  291. ++rightBin;
  292. double sum = 0;
  293. for (TPairSet::const_iterator it = rightBin; it != Bins.end(); ++it) {
  294. sum += it->second;
  295. }
  296. return sum;
  297. });
  298. }
  299. }
  300. double TAdaptiveHistogram::GetSumBelowBound(double bound) {
  301. if (Empty()) {
  302. return 0.0;
  303. }
  304. if (bound < MinValue) {
  305. return 0.0;
  306. }
  307. if (bound > MaxValue) {
  308. return Sum;
  309. }
  310. if (!PrecomputedBins.empty()) {
  311. return GetSumBelowBoundImpl(
  312. bound,
  313. PrecomputedBins,
  314. LowerBound(PrecomputedBins.begin(), PrecomputedBins.end(), TFastBin{bound, -1.0, 0, 0}),
  315. [](const auto& it) { return it->SumBelow; });
  316. } else {
  317. return GetSumBelowBoundImpl(
  318. bound,
  319. Bins,
  320. Bins.lower_bound(TWeightedValue(bound, -1.0)),
  321. [this](TPairSet::const_iterator rightBin) {
  322. double sum = 0;
  323. for (TPairSet::iterator it = Bins.begin(); it != rightBin; ++it) {
  324. sum += it->second;
  325. }
  326. return sum;
  327. });
  328. }
  329. }
  330. double TAdaptiveHistogram::CalcUpperBound(double sum) {
  331. Y_ABORT_UNLESS(sum >= 0, "Sum must be >= 0");
  332. if (sum == 0.0) {
  333. return MinValue;
  334. }
  335. if (Empty()) {
  336. return MaxValue;
  337. }
  338. TPairSet::iterator current = Bins.begin();
  339. double gatheredSum = 0.0;
  340. while (current != Bins.end() && gatheredSum < sum) {
  341. gatheredSum += current->second;
  342. ++current;
  343. }
  344. --current;
  345. if (gatheredSum < sum) {
  346. return MaxValue;
  347. }
  348. TWeightedValue left(MinValue, 0.0);
  349. TWeightedValue right(MaxValue, 0.0);
  350. if (current != Bins.begin()) {
  351. TPairSet::iterator leftBin = current;
  352. --leftBin;
  353. left = *leftBin;
  354. }
  355. {
  356. TPairSet::iterator rightBin = current;
  357. ++rightBin;
  358. if (rightBin != Bins.end()) {
  359. right = *rightBin;
  360. }
  361. }
  362. double sumToAdd = sum - (gatheredSum - current->second - left.second / 2);
  363. if (sumToAdd <= ((current->second + left.second) / 2)) {
  364. return left.first + 2 * sumToAdd * (current->first - left.first) / (current->second + left.second);
  365. } else {
  366. sumToAdd -= (current->second + left.second) / 2;
  367. return current->first + 2 * sumToAdd * (right.first - current->first) / (right.second + current->second);
  368. }
  369. }
  370. double TAdaptiveHistogram::CalcLowerBound(double sum) {
  371. Y_ABORT_UNLESS(sum >= 0, "Sum must be >= 0");
  372. if (sum == 0.0) {
  373. return MaxValue;
  374. }
  375. if (Empty()) {
  376. return MinValue;
  377. }
  378. TPairSet::iterator current = Bins.end();
  379. double gatheredSum = 0.0;
  380. while (current != Bins.begin() && gatheredSum < sum) {
  381. --current;
  382. gatheredSum += current->second;
  383. }
  384. if (gatheredSum < sum) {
  385. return MinValue;
  386. }
  387. TWeightedValue left(MinValue, 0.0);
  388. TWeightedValue right(MaxValue, 0.0);
  389. if (current != Bins.begin()) {
  390. TPairSet::iterator leftBin = current;
  391. --leftBin;
  392. left = *leftBin;
  393. }
  394. {
  395. TPairSet::iterator rightBin = current;
  396. ++rightBin;
  397. if (rightBin != Bins.end()) {
  398. right = *rightBin;
  399. }
  400. }
  401. double sumToAdd = sum - (gatheredSum - current->second - right.second / 2);
  402. if (sumToAdd <= ((current->second + right.second) / 2)) {
  403. return right.first - 2 * sumToAdd * (right.first - current->first) / (current->second + right.second);
  404. } else {
  405. sumToAdd -= (current->second + right.second) / 2;
  406. return current->first - 2 * sumToAdd * (current->first - left.first) / (left.second + current->second);
  407. }
  408. }
  409. double TAdaptiveHistogram::CalcUpperBoundSafe(double sum) {
  410. if (!Empty()) {
  411. sum = Max(Bins.begin()->second, sum);
  412. }
  413. return CalcUpperBound(sum);
  414. }
  415. double TAdaptiveHistogram::CalcLowerBoundSafe(double sum) {
  416. if (!Empty()) {
  417. sum = Max(Bins.rbegin()->second, sum);
  418. }
  419. return CalcLowerBound(sum);
  420. }
  421. void TAdaptiveHistogram::FromIHistogram(IHistogram* histo) {
  422. if (!histo) {
  423. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a NULL pointer";
  424. }
  425. if (CalcQuality == CalcWardQuality) {
  426. ythrow yexception() << "Not implemented";
  427. } else if (CalcQuality != CalcDistanceQuality && CalcQuality != CalcWeightQuality) {
  428. ythrow yexception() << "Attempt to create TAdaptiveHistogram from a pointer without default CalcQuality";
  429. }
  430. Id = histo->GetId();
  431. if (histo->Empty()) {
  432. return;
  433. }
  434. double sum = histo->GetSum();
  435. double minValue = histo->GetMinValue();
  436. double maxValue = histo->GetMaxValue();
  437. if (minValue == maxValue) {
  438. Add(minValue, sum);
  439. return;
  440. }
  441. if (CalcQuality == CalcDistanceQuality) {
  442. double binRange = (maxValue - minValue) / (Intervals);
  443. for (size_t i = 0; i < Intervals; ++i) {
  444. Add(minValue + binRange * (i + 0.5), histo->GetSumInRange(minValue + binRange * i, minValue + binRange * (i + 1)));
  445. }
  446. } else if (CalcQuality == CalcWeightQuality && sum != 0.0) {
  447. double slab = sum / Intervals;
  448. double prevBound = minValue;
  449. for (size_t i = 0; i < Intervals; ++i) {
  450. double bound = histo->CalcUpperBound(slab * (i + 1));
  451. Add((bound + prevBound) / 2, slab);
  452. prevBound = bound;
  453. }
  454. }
  455. MinValue = minValue;
  456. MaxValue = maxValue;
  457. }
  458. void TAdaptiveHistogram::Add(const TWeightedValue& weightedValue, bool initial) {
  459. const double& value = weightedValue.first;
  460. const double& weight = weightedValue.second;
  461. if (weight <= 0.0) {
  462. return; // all zero-weighted values should be skipped because they don't affect the distribution, negative weights are forbidden
  463. }
  464. if (initial) {
  465. Sum += weight;
  466. }
  467. if (Bins.size() == 0) {
  468. MinValue = value;
  469. MaxValue = value;
  470. Bins.insert(weightedValue);
  471. return;
  472. }
  473. if (value < MinValue) {
  474. MinValue = value;
  475. }
  476. if (value > MaxValue) {
  477. MaxValue = value;
  478. }
  479. TPairSet::iterator rightBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  480. if (rightBin != Bins.end() && rightBin->first == value) {
  481. TPairSet::iterator currentBin = rightBin;
  482. ++rightBin;
  483. TWeightedValue newBin(value, weight + currentBin->second);
  484. if (rightBin != Bins.end()) {
  485. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) == 1, "Erase failed");
  486. BinsByQuality.insert(CalcQuality(newBin, *rightBin));
  487. }
  488. if (currentBin != Bins.begin()) {
  489. TPairSet::iterator leftBin = currentBin;
  490. --leftBin;
  491. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) == 1, "Erase failed");
  492. BinsByQuality.insert(CalcQuality(*leftBin, newBin));
  493. }
  494. Bins.erase(currentBin);
  495. Bins.insert(newBin);
  496. return;
  497. }
  498. if (rightBin == Bins.begin()) {
  499. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  500. } else {
  501. TPairSet::iterator leftBin = rightBin;
  502. --leftBin;
  503. if (rightBin == Bins.end()) {
  504. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  505. } else {
  506. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *rightBin)) == 1, "Erase failed");
  507. BinsByQuality.insert(CalcQuality(*leftBin, weightedValue));
  508. BinsByQuality.insert(CalcQuality(weightedValue, *rightBin));
  509. }
  510. }
  511. Bins.insert(weightedValue);
  512. if (Bins.size() > Intervals) {
  513. Shrink();
  514. }
  515. }
  516. void TAdaptiveHistogram::Erase(double value) {
  517. TPairSet::iterator currentBin = Bins.lower_bound(TWeightedValue(value, -1.0));
  518. Y_ABORT_UNLESS(currentBin != Bins.end() && currentBin->first == value, "Can't find bin that should be erased");
  519. TPairSet::iterator rightBin = currentBin;
  520. ++rightBin;
  521. if (currentBin == Bins.begin()) {
  522. Y_ABORT_UNLESS(rightBin != Bins.end(), "No right bin for the first bin");
  523. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  524. } else {
  525. TPairSet::iterator leftBin = currentBin;
  526. --leftBin;
  527. if (rightBin == Bins.end()) {
  528. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  529. } else {
  530. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*leftBin, *currentBin)) != 0, "Erase failed");
  531. Y_ABORT_UNLESS(BinsByQuality.erase(CalcQuality(*currentBin, *rightBin)) != 0, "Erase failed");
  532. BinsByQuality.insert(CalcQuality(*leftBin, *rightBin));
  533. }
  534. }
  535. Bins.erase(currentBin);
  536. }
  537. void TAdaptiveHistogram::Shrink() {
  538. TPairSet::iterator worstBin = BinsByQuality.begin();
  539. Y_ABORT_UNLESS(worstBin != BinsByQuality.end(), "No right bin for the first bin");
  540. TPairSet::iterator leftBin = Bins.lower_bound(TWeightedValue(worstBin->second, -1.0));
  541. Y_ABORT_UNLESS(leftBin != Bins.end() && leftBin->first == worstBin->second, "Can't find worst bin");
  542. TPairSet::iterator rightBin = leftBin;
  543. ++rightBin;
  544. Y_ABORT_UNLESS(rightBin != Bins.end(), "Can't find right bin");
  545. TWeightedValue newBin((leftBin->first * leftBin->second + rightBin->first * rightBin->second) / (leftBin->second + rightBin->second), leftBin->second + rightBin->second);
  546. if (Bins.size() > 2) {
  547. Erase(leftBin->first);
  548. Erase(rightBin->first);
  549. } else {
  550. Bins.clear();
  551. BinsByQuality.clear();
  552. }
  553. Add(newBin, false);
  554. }
  555. void TAdaptiveHistogram::PrecomputePartialSums() {
  556. PrecomputedBins.clear();
  557. PrecomputedBins.reserve(Bins.size());
  558. double currentSum = 0;
  559. for (const auto& bin : Bins) {
  560. PrecomputedBins.emplace_back(bin.first, bin.second, currentSum, Sum - currentSum - bin.second);
  561. currentSum += bin.second;
  562. }
  563. }
  564. template <typename TBins, typename TGetSumAbove>
  565. double TAdaptiveHistogram::GetSumAboveBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumAbove& getSumAbove) const {
  566. typename TBins::value_type left(MinValue, 0.0);
  567. typename TBins::value_type right(MaxValue, 0.0);
  568. if (rightBin != bins.end()) {
  569. right = *rightBin;
  570. }
  571. if (rightBin != bins.begin()) {
  572. typename TBins::const_iterator leftBin = rightBin;
  573. --leftBin;
  574. left = *leftBin;
  575. }
  576. double sum = (right.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (right.first - bound) / (right.first - left.first)));
  577. if (rightBin == bins.end()) {
  578. return sum;
  579. }
  580. sum += getSumAbove(rightBin);
  581. return sum;
  582. }
  583. template <typename TBins, typename TGetSumBelow>
  584. double TAdaptiveHistogram::GetSumBelowBoundImpl(double bound, const TBins& bins, typename TBins::const_iterator rightBin, const TGetSumBelow& getSumBelow) const {
  585. typename TBins::value_type left(MinValue, 0.0);
  586. typename TBins::value_type right(MaxValue, 0.0);
  587. if (rightBin != bins.end()) {
  588. right = *rightBin;
  589. }
  590. if (rightBin != bins.begin()) {
  591. typename TBins::const_iterator leftBin = rightBin;
  592. --leftBin;
  593. left = *leftBin;
  594. }
  595. double sum = (left.second / 2) + ((right.first == left.first) ? ((left.second + right.second) / 2) : (((left.second + right.second) / 2) * (bound - left.first) / (right.first - left.first)));
  596. if (rightBin == bins.begin()) {
  597. return sum;
  598. }
  599. --rightBin;
  600. sum += getSumBelow(rightBin);
  601. return sum;
  602. }
  603. }