top_keeper.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #pragma once
  2. #include <util/generic/vector.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/maybe.h>
  5. #include <util/str_stl.h>
  6. template <class T, class TComparator = TGreater<T>, bool sort = true, class Alloc = std::allocator<T>>
  7. class TTopKeeper {
  8. private:
  9. class TVectorWithMin {
  10. private:
  11. TVector<T, Alloc> Internal;
  12. size_t HalfMaxSize;
  13. TComparator Comparer;
  14. size_t MinElementIndex;
  15. private:
  16. void Reserve() {
  17. Internal.reserve(2 * HalfMaxSize);
  18. }
  19. template <class UT>
  20. bool Insert(UT&& value) noexcept {
  21. if (Y_UNLIKELY(0 == HalfMaxSize)) {
  22. return false;
  23. }
  24. if (Internal.size() < HalfMaxSize) {
  25. if (Internal.empty() || Comparer(Internal[MinElementIndex], value)) {
  26. MinElementIndex = Internal.size();
  27. Internal.push_back(std::forward<UT>(value));
  28. return true;
  29. }
  30. } else if (!Comparer(value, Internal[MinElementIndex])) {
  31. return false;
  32. }
  33. Internal.push_back(std::forward<UT>(value));
  34. if (Internal.size() == (HalfMaxSize << 1)) {
  35. Partition();
  36. }
  37. return true;
  38. }
  39. public:
  40. using value_type = T;
  41. TVectorWithMin(const size_t halfMaxSize, const TComparator& comp)
  42. : HalfMaxSize(halfMaxSize)
  43. , Comparer(comp)
  44. {
  45. Reserve();
  46. }
  47. template <class TAllocParam>
  48. TVectorWithMin(const size_t halfMaxSize, const TComparator& comp, TAllocParam&& param)
  49. : Internal(std::forward<TAllocParam>(param))
  50. , HalfMaxSize(halfMaxSize)
  51. , Comparer(comp)
  52. {
  53. Reserve();
  54. }
  55. void SortAccending() {
  56. Sort(Internal.begin(), Internal.end(), Comparer);
  57. }
  58. void Partition() {
  59. if (Y_UNLIKELY(HalfMaxSize == 0)) {
  60. return;
  61. }
  62. if (Y_LIKELY(Internal.size() >= HalfMaxSize)) {
  63. NthElement(Internal.begin(), Internal.begin() + HalfMaxSize - 1, Internal.end(), Comparer);
  64. Internal.erase(Internal.begin() + HalfMaxSize, Internal.end());
  65. //we should update MinElementIndex cause we just altered Internal
  66. MinElementIndex = HalfMaxSize - 1;
  67. }
  68. }
  69. bool Push(const T& value) {
  70. return Insert(value);
  71. }
  72. bool Push(T&& value) {
  73. return Insert(std::move(value));
  74. }
  75. template <class... TArgs>
  76. bool Emplace(TArgs&&... args) {
  77. return Insert(T(std::forward<TArgs>(args)...)); // TODO: make it "real" emplace, not that fake one
  78. }
  79. void SetMaxSize(size_t newHalfMaxSize) {
  80. HalfMaxSize = newHalfMaxSize;
  81. Reserve();
  82. Partition();
  83. }
  84. size_t GetSize() const {
  85. return Internal.size();
  86. }
  87. const auto& GetInternal() const {
  88. return Internal;
  89. }
  90. auto Extract() {
  91. using std::swap;
  92. decltype(Internal) values;
  93. swap(Internal, values);
  94. Reset();
  95. return values;
  96. }
  97. const T& Back() const {
  98. return Internal.back();
  99. }
  100. void Pop() {
  101. Internal.pop_back();
  102. }
  103. void Reset() {
  104. Internal.clear();
  105. //MinElementIndex will reset itself when we start adding new values
  106. }
  107. };
  108. void CheckNotFinalized() {
  109. Y_ENSURE(!Finalized, "Cannot insert after finalizing (Pop() / GetNext() / Finalize())! "
  110. "Use TLimitedHeap for this scenario");
  111. }
  112. size_t MaxSize;
  113. const TComparator Comparer;
  114. TVectorWithMin Internal;
  115. bool Finalized;
  116. public:
  117. TTopKeeper()
  118. : MaxSize(0)
  119. , Comparer()
  120. , Internal(0, Comparer)
  121. , Finalized(false)
  122. {
  123. }
  124. TTopKeeper(size_t maxSize, const TComparator& comp = TComparator())
  125. : MaxSize(maxSize)
  126. , Comparer(comp)
  127. , Internal(maxSize, comp)
  128. , Finalized(false)
  129. {
  130. }
  131. template <class TAllocParam>
  132. TTopKeeper(size_t maxSize, const TComparator& comp, TAllocParam&& param)
  133. : MaxSize(maxSize)
  134. , Comparer(comp)
  135. , Internal(maxSize, comp, std::forward<TAllocParam>(param))
  136. , Finalized(false)
  137. {
  138. }
  139. void Finalize() {
  140. if (Y_LIKELY(Finalized)) {
  141. return;
  142. }
  143. Internal.Partition();
  144. if (sort) {
  145. Internal.SortAccending();
  146. }
  147. Finalized = true;
  148. }
  149. const T& GetNext() {
  150. Y_ENSURE(!IsEmpty(), "Trying GetNext from empty heap!");
  151. Finalize();
  152. return Internal.Back();
  153. }
  154. void Pop() {
  155. Y_ENSURE(!IsEmpty(), "Trying Pop from empty heap!");
  156. Finalize();
  157. Internal.Pop();
  158. if (IsEmpty()) {
  159. Reset();
  160. }
  161. }
  162. T ExtractOne() {
  163. Y_ENSURE(!IsEmpty(), "Trying ExtractOne from empty heap!");
  164. Finalize();
  165. auto value = std::move(Internal.Back());
  166. Internal.Pop();
  167. if (IsEmpty()) {
  168. Reset();
  169. }
  170. return value;
  171. }
  172. auto Extract() {
  173. Finalize();
  174. return Internal.Extract();
  175. }
  176. bool Insert(const T& value) {
  177. CheckNotFinalized();
  178. return Internal.Push(value);
  179. }
  180. bool Insert(T&& value) {
  181. CheckNotFinalized();
  182. return Internal.Push(std::move(value));
  183. }
  184. template <class... TArgs>
  185. bool Emplace(TArgs&&... args) {
  186. CheckNotFinalized();
  187. return Internal.Emplace(std::forward<TArgs>(args)...);
  188. }
  189. const auto& GetInternal() {
  190. Finalize();
  191. return Internal.GetInternal();
  192. }
  193. bool IsEmpty() const {
  194. return Internal.GetSize() == 0;
  195. }
  196. size_t GetSize() const {
  197. return Min(Internal.GetSize(), MaxSize);
  198. }
  199. size_t GetMaxSize() const {
  200. return MaxSize;
  201. }
  202. void SetMaxSize(size_t newMaxSize) {
  203. Y_ENSURE(!Finalized, "Cannot resize after finalizing (Pop() / GetNext() / Finalize())! "
  204. "Use TLimitedHeap for this scenario");
  205. MaxSize = newMaxSize;
  206. Internal.SetMaxSize(newMaxSize);
  207. }
  208. void Reset() {
  209. Internal.Reset();
  210. Finalized = false;
  211. }
  212. };