raw_hash_set.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "y_absl/container/internal/raw_hash_set.h"
  15. #include <atomic>
  16. #include <cassert>
  17. #include <cstddef>
  18. #include <cstdint>
  19. #include <cstring>
  20. #include "y_absl/base/attributes.h"
  21. #include "y_absl/base/config.h"
  22. #include "y_absl/base/dynamic_annotations.h"
  23. #include "y_absl/base/internal/endian.h"
  24. #include "y_absl/base/optimization.h"
  25. #include "y_absl/container/internal/container_memory.h"
  26. #include "y_absl/container/internal/hashtablez_sampler.h"
  27. #include "y_absl/hash/hash.h"
  28. namespace y_absl {
  29. Y_ABSL_NAMESPACE_BEGIN
  30. namespace container_internal {
  31. // Represents a control byte corresponding to a full slot with arbitrary hash.
  32. constexpr ctrl_t ZeroCtrlT() { return static_cast<ctrl_t>(0); }
  33. // We have space for `growth_info` before a single block of control bytes. A
  34. // single block of empty control bytes for tables without any slots allocated.
  35. // This enables removing a branch in the hot path of find(). In order to ensure
  36. // that the control bytes are aligned to 16, we have 16 bytes before the control
  37. // bytes even though growth_info only needs 8.
  38. alignas(16) Y_ABSL_CONST_INIT Y_ABSL_DLL const ctrl_t kEmptyGroup[32] = {
  39. ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
  40. ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
  41. ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
  42. ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(), ZeroCtrlT(),
  43. ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  44. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  45. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  46. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty};
  47. // We need one full byte followed by a sentinel byte for iterator::operator++ to
  48. // work. We have a full group after kSentinel to be safe (in case operator++ is
  49. // changed to read a full group).
  50. Y_ABSL_CONST_INIT Y_ABSL_DLL const ctrl_t kSooControl[17] = {
  51. ZeroCtrlT(), ctrl_t::kSentinel, ZeroCtrlT(), ctrl_t::kEmpty,
  52. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  53. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  54. ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty,
  55. ctrl_t::kEmpty};
  56. static_assert(NumControlBytes(SooCapacity()) <= 17,
  57. "kSooControl capacity too small");
  58. #ifdef Y_ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL
  59. constexpr size_t Group::kWidth;
  60. #endif
  61. namespace {
  62. // Returns "random" seed.
  63. inline size_t RandomSeed() {
  64. #ifdef Y_ABSL_HAVE_THREAD_LOCAL
  65. static thread_local size_t counter = 0;
  66. // On Linux kernels >= 5.4 the MSAN runtime has a false-positive when
  67. // accessing thread local storage data from loaded libraries
  68. // (https://github.com/google/sanitizers/issues/1265), for this reason counter
  69. // needs to be annotated as initialized.
  70. Y_ABSL_ANNOTATE_MEMORY_IS_INITIALIZED(&counter, sizeof(size_t));
  71. size_t value = ++counter;
  72. #else // Y_ABSL_HAVE_THREAD_LOCAL
  73. static std::atomic<size_t> counter(0);
  74. size_t value = counter.fetch_add(1, std::memory_order_relaxed);
  75. #endif // Y_ABSL_HAVE_THREAD_LOCAL
  76. return value ^ static_cast<size_t>(reinterpret_cast<uintptr_t>(&counter));
  77. }
  78. bool ShouldRehashForBugDetection(const ctrl_t* ctrl, size_t capacity) {
  79. // Note: we can't use the abseil-random library because abseil-random
  80. // depends on swisstable. We want to return true with probability
  81. // `min(1, RehashProbabilityConstant() / capacity())`. In order to do this,
  82. // we probe based on a random hash and see if the offset is less than
  83. // RehashProbabilityConstant().
  84. return probe(ctrl, capacity, y_absl::HashOf(RandomSeed())).offset() <
  85. RehashProbabilityConstant();
  86. }
  87. } // namespace
  88. GenerationType* EmptyGeneration() {
  89. if (SwisstableGenerationsEnabled()) {
  90. constexpr size_t kNumEmptyGenerations = 1024;
  91. static constexpr GenerationType kEmptyGenerations[kNumEmptyGenerations]{};
  92. return const_cast<GenerationType*>(
  93. &kEmptyGenerations[RandomSeed() % kNumEmptyGenerations]);
  94. }
  95. return nullptr;
  96. }
  97. bool CommonFieldsGenerationInfoEnabled::
  98. should_rehash_for_bug_detection_on_insert(const ctrl_t* ctrl,
  99. size_t capacity) const {
  100. if (reserved_growth_ == kReservedGrowthJustRanOut) return true;
  101. if (reserved_growth_ > 0) return false;
  102. return ShouldRehashForBugDetection(ctrl, capacity);
  103. }
  104. bool CommonFieldsGenerationInfoEnabled::should_rehash_for_bug_detection_on_move(
  105. const ctrl_t* ctrl, size_t capacity) const {
  106. return ShouldRehashForBugDetection(ctrl, capacity);
  107. }
  108. bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash,
  109. const ctrl_t* ctrl) {
  110. // To avoid problems with weak hashes and single bit tests, we use % 13.
  111. // TODO(kfm,sbenza): revisit after we do unconditional mixing
  112. return !is_small(capacity) && (H1(hash, ctrl) ^ RandomSeed()) % 13 > 6;
  113. }
  114. size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size,
  115. CommonFields& common) {
  116. assert(common.capacity() == NextCapacity(SooCapacity()));
  117. // After resize from capacity 1 to 3, we always have exactly the slot with
  118. // index 1 occupied, so we need to insert either at index 0 or index 2.
  119. assert(HashSetResizeHelper::SooSlotIndex() == 1);
  120. PrepareInsertCommon(common);
  121. const size_t offset = H1(hash, common.control()) & 2;
  122. common.growth_info().OverwriteEmptyAsFull();
  123. SetCtrlInSingleGroupTable(common, offset, H2(hash), slot_size);
  124. common.infoz().RecordInsert(hash, /*distance_from_desired=*/0);
  125. return offset;
  126. }
  127. void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity) {
  128. assert(ctrl[capacity] == ctrl_t::kSentinel);
  129. assert(IsValidCapacity(capacity));
  130. for (ctrl_t* pos = ctrl; pos < ctrl + capacity; pos += Group::kWidth) {
  131. Group{pos}.ConvertSpecialToEmptyAndFullToDeleted(pos);
  132. }
  133. // Copy the cloned ctrl bytes.
  134. std::memcpy(ctrl + capacity + 1, ctrl, NumClonedBytes());
  135. ctrl[capacity] = ctrl_t::kSentinel;
  136. }
  137. // Extern template instantiation for inline function.
  138. template FindInfo find_first_non_full(const CommonFields&, size_t);
  139. FindInfo find_first_non_full_outofline(const CommonFields& common,
  140. size_t hash) {
  141. return find_first_non_full(common, hash);
  142. }
  143. namespace {
  144. // Returns the address of the slot just after slot assuming each slot has the
  145. // specified size.
  146. static inline void* NextSlot(void* slot, size_t slot_size) {
  147. return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) + slot_size);
  148. }
  149. // Returns the address of the slot just before slot assuming each slot has the
  150. // specified size.
  151. static inline void* PrevSlot(void* slot, size_t slot_size) {
  152. return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(slot) - slot_size);
  153. }
  154. // Finds guaranteed to exists empty slot from the given position.
  155. // NOTE: this function is almost never triggered inside of the
  156. // DropDeletesWithoutResize, so we keep it simple.
  157. // The table is rather sparse, so empty slot will be found very quickly.
  158. size_t FindEmptySlot(size_t start, size_t end, const ctrl_t* ctrl) {
  159. for (size_t i = start; i < end; ++i) {
  160. if (IsEmpty(ctrl[i])) {
  161. return i;
  162. }
  163. }
  164. assert(false && "no empty slot");
  165. return ~size_t{};
  166. }
  167. void DropDeletesWithoutResize(CommonFields& common,
  168. const PolicyFunctions& policy) {
  169. void* set = &common;
  170. void* slot_array = common.slot_array();
  171. const size_t capacity = common.capacity();
  172. assert(IsValidCapacity(capacity));
  173. assert(!is_small(capacity));
  174. // Algorithm:
  175. // - mark all DELETED slots as EMPTY
  176. // - mark all FULL slots as DELETED
  177. // - for each slot marked as DELETED
  178. // hash = Hash(element)
  179. // target = find_first_non_full(hash)
  180. // if target is in the same group
  181. // mark slot as FULL
  182. // else if target is EMPTY
  183. // transfer element to target
  184. // mark slot as EMPTY
  185. // mark target as FULL
  186. // else if target is DELETED
  187. // swap current element with target element
  188. // mark target as FULL
  189. // repeat procedure for current slot with moved from element (target)
  190. ctrl_t* ctrl = common.control();
  191. ConvertDeletedToEmptyAndFullToDeleted(ctrl, capacity);
  192. const void* hash_fn = policy.hash_fn(common);
  193. auto hasher = policy.hash_slot;
  194. auto transfer = policy.transfer;
  195. const size_t slot_size = policy.slot_size;
  196. size_t total_probe_length = 0;
  197. void* slot_ptr = SlotAddress(slot_array, 0, slot_size);
  198. // The index of an empty slot that can be used as temporary memory for
  199. // the swap operation.
  200. constexpr size_t kUnknownId = ~size_t{};
  201. size_t tmp_space_id = kUnknownId;
  202. for (size_t i = 0; i != capacity;
  203. ++i, slot_ptr = NextSlot(slot_ptr, slot_size)) {
  204. assert(slot_ptr == SlotAddress(slot_array, i, slot_size));
  205. if (IsEmpty(ctrl[i])) {
  206. tmp_space_id = i;
  207. continue;
  208. }
  209. if (!IsDeleted(ctrl[i])) continue;
  210. const size_t hash = (*hasher)(hash_fn, slot_ptr);
  211. const FindInfo target = find_first_non_full(common, hash);
  212. const size_t new_i = target.offset;
  213. total_probe_length += target.probe_length;
  214. // Verify if the old and new i fall within the same group wrt the hash.
  215. // If they do, we don't need to move the object as it falls already in the
  216. // best probe we can.
  217. const size_t probe_offset = probe(common, hash).offset();
  218. const auto probe_index = [probe_offset, capacity](size_t pos) {
  219. return ((pos - probe_offset) & capacity) / Group::kWidth;
  220. };
  221. // Element doesn't move.
  222. if (Y_ABSL_PREDICT_TRUE(probe_index(new_i) == probe_index(i))) {
  223. SetCtrl(common, i, H2(hash), slot_size);
  224. continue;
  225. }
  226. void* new_slot_ptr = SlotAddress(slot_array, new_i, slot_size);
  227. if (IsEmpty(ctrl[new_i])) {
  228. // Transfer element to the empty spot.
  229. // SetCtrl poisons/unpoisons the slots so we have to call it at the
  230. // right time.
  231. SetCtrl(common, new_i, H2(hash), slot_size);
  232. (*transfer)(set, new_slot_ptr, slot_ptr);
  233. SetCtrl(common, i, ctrl_t::kEmpty, slot_size);
  234. // Initialize or change empty space id.
  235. tmp_space_id = i;
  236. } else {
  237. assert(IsDeleted(ctrl[new_i]));
  238. SetCtrl(common, new_i, H2(hash), slot_size);
  239. // Until we are done rehashing, DELETED marks previously FULL slots.
  240. if (tmp_space_id == kUnknownId) {
  241. tmp_space_id = FindEmptySlot(i + 1, capacity, ctrl);
  242. }
  243. void* tmp_space = SlotAddress(slot_array, tmp_space_id, slot_size);
  244. SanitizerUnpoisonMemoryRegion(tmp_space, slot_size);
  245. // Swap i and new_i elements.
  246. (*transfer)(set, tmp_space, new_slot_ptr);
  247. (*transfer)(set, new_slot_ptr, slot_ptr);
  248. (*transfer)(set, slot_ptr, tmp_space);
  249. SanitizerPoisonMemoryRegion(tmp_space, slot_size);
  250. // repeat the processing of the ith slot
  251. --i;
  252. slot_ptr = PrevSlot(slot_ptr, slot_size);
  253. }
  254. }
  255. ResetGrowthLeft(common);
  256. common.infoz().RecordRehash(total_probe_length);
  257. }
  258. static bool WasNeverFull(CommonFields& c, size_t index) {
  259. if (is_single_group(c.capacity())) {
  260. return true;
  261. }
  262. const size_t index_before = (index - Group::kWidth) & c.capacity();
  263. const auto empty_after = Group(c.control() + index).MaskEmpty();
  264. const auto empty_before = Group(c.control() + index_before).MaskEmpty();
  265. // We count how many consecutive non empties we have to the right and to the
  266. // left of `it`. If the sum is >= kWidth then there is at least one probe
  267. // window that might have seen a full group.
  268. return empty_before && empty_after &&
  269. static_cast<size_t>(empty_after.TrailingZeros()) +
  270. empty_before.LeadingZeros() <
  271. Group::kWidth;
  272. }
  273. } // namespace
  274. void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size) {
  275. assert(IsFull(c.control()[index]) && "erasing a dangling iterator");
  276. c.decrement_size();
  277. c.infoz().RecordErase();
  278. if (WasNeverFull(c, index)) {
  279. SetCtrl(c, index, ctrl_t::kEmpty, slot_size);
  280. c.growth_info().OverwriteFullAsEmpty();
  281. return;
  282. }
  283. c.growth_info().OverwriteFullAsDeleted();
  284. SetCtrl(c, index, ctrl_t::kDeleted, slot_size);
  285. }
  286. void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
  287. bool reuse, bool soo_enabled) {
  288. c.set_size(0);
  289. if (reuse) {
  290. assert(!soo_enabled || c.capacity() > SooCapacity());
  291. ResetCtrl(c, policy.slot_size);
  292. ResetGrowthLeft(c);
  293. c.infoz().RecordStorageChanged(0, c.capacity());
  294. } else {
  295. // We need to record infoz before calling dealloc, which will unregister
  296. // infoz.
  297. c.infoz().RecordClearedReservation();
  298. c.infoz().RecordStorageChanged(0, soo_enabled ? SooCapacity() : 0);
  299. (*policy.dealloc)(c, policy);
  300. c = soo_enabled ? CommonFields{soo_tag_t{}} : CommonFields{};
  301. }
  302. }
  303. void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
  304. ctrl_t* __restrict new_ctrl, size_t new_capacity) const {
  305. assert(is_single_group(new_capacity));
  306. constexpr size_t kHalfWidth = Group::kWidth / 2;
  307. constexpr size_t kQuarterWidth = Group::kWidth / 4;
  308. assert(old_capacity_ < kHalfWidth);
  309. static_assert(sizeof(uint64_t) >= kHalfWidth,
  310. "Group size is too large. The ctrl bytes for half a group must "
  311. "fit into a uint64_t for this implementation.");
  312. static_assert(sizeof(uint64_t) <= Group::kWidth,
  313. "Group size is too small. The ctrl bytes for a group must "
  314. "cover a uint64_t for this implementation.");
  315. const size_t half_old_capacity = old_capacity_ / 2;
  316. // NOTE: operations are done with compile time known size = kHalfWidth.
  317. // Compiler optimizes that into single ASM operation.
  318. // Load the bytes from half_old_capacity + 1. This contains the last half of
  319. // old_ctrl bytes, followed by the sentinel byte, and then the first half of
  320. // the cloned bytes. This effectively shuffles the control bytes.
  321. uint64_t copied_bytes = 0;
  322. copied_bytes =
  323. y_absl::little_endian::Load64(old_ctrl() + half_old_capacity + 1);
  324. // We change the sentinel byte to kEmpty before storing to both the start of
  325. // the new_ctrl, and past the end of the new_ctrl later for the new cloned
  326. // bytes. Note that this is faster than setting the sentinel byte to kEmpty
  327. // after the copy directly in new_ctrl because we are limited on store
  328. // bandwidth.
  329. constexpr uint64_t kEmptyXorSentinel =
  330. static_cast<uint8_t>(ctrl_t::kEmpty) ^
  331. static_cast<uint8_t>(ctrl_t::kSentinel);
  332. const uint64_t mask_convert_old_sentinel_to_empty =
  333. kEmptyXorSentinel << (half_old_capacity * 8);
  334. copied_bytes ^= mask_convert_old_sentinel_to_empty;
  335. // Copy second half of bytes to the beginning. This correctly sets the bytes
  336. // [0, old_capacity]. We potentially copy more bytes in order to have compile
  337. // time known size. Mirrored bytes from the old_ctrl() will also be copied. In
  338. // case of old_capacity_ == 3, we will copy 1st element twice.
  339. // Examples:
  340. // (old capacity = 1)
  341. // old_ctrl = 0S0EEEEEEE...
  342. // new_ctrl = E0EEEEEE??...
  343. //
  344. // (old capacity = 3)
  345. // old_ctrl = 012S012EEEEE...
  346. // new_ctrl = 12E012EE????...
  347. //
  348. // (old capacity = 7)
  349. // old_ctrl = 0123456S0123456EE...
  350. // new_ctrl = 456E0123?????????...
  351. y_absl::little_endian::Store64(new_ctrl, copied_bytes);
  352. // Set the space [old_capacity + 1, new_capacity] to empty as these bytes will
  353. // not be written again. This is safe because
  354. // NumControlBytes = new_capacity + kWidth and new_capacity >=
  355. // old_capacity+1.
  356. // Examples:
  357. // (old_capacity = 3, new_capacity = 15)
  358. // new_ctrl = 12E012EE?????????????...??
  359. // *new_ctrl = 12E0EEEEEEEEEEEEEEEE?...??
  360. // position / S
  361. //
  362. // (old_capacity = 7, new_capacity = 15)
  363. // new_ctrl = 456E0123?????????????????...??
  364. // *new_ctrl = 456E0123EEEEEEEEEEEEEEEE?...??
  365. // position / S
  366. std::memset(new_ctrl + old_capacity_ + 1, static_cast<int8_t>(ctrl_t::kEmpty),
  367. Group::kWidth);
  368. // Set the last kHalfWidth bytes to empty, to ensure the bytes all the way to
  369. // the end are initialized.
  370. // Examples:
  371. // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...???????
  372. // *new_ctrl = 12E0EEEEEEEEEEEEEEEE???EEEEEEEE
  373. // position S /
  374. //
  375. // new_ctrl = 456E0123EEEEEEEEEEEEEEEE???????
  376. // *new_ctrl = 456E0123EEEEEEEEEEEEEEEEEEEEEEE
  377. // position S /
  378. std::memset(new_ctrl + NumControlBytes(new_capacity) - kHalfWidth,
  379. static_cast<int8_t>(ctrl_t::kEmpty), kHalfWidth);
  380. // Copy the first bytes to the end (starting at new_capacity +1) to set the
  381. // cloned bytes. Note that we use the already copied bytes from old_ctrl here
  382. // rather than copying from new_ctrl to avoid a Read-after-Write hazard, since
  383. // new_ctrl was just written to. The first old_capacity-1 bytes are set
  384. // correctly. Then there may be up to old_capacity bytes that need to be
  385. // overwritten, and any remaining bytes will be correctly set to empty. This
  386. // sets [new_capacity + 1, new_capacity +1 + old_capacity] correctly.
  387. // Examples:
  388. // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...???????
  389. // *new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE
  390. // position S/
  391. //
  392. // new_ctrl = 456E0123EEEEEEEE?...???EEEEEEEE
  393. // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE
  394. // position S/
  395. y_absl::little_endian::Store64(new_ctrl + new_capacity + 1, copied_bytes);
  396. // Set The remaining bytes at the end past the cloned bytes to empty. The
  397. // incorrectly set bytes are [new_capacity + old_capacity + 2,
  398. // min(new_capacity + 1 + kHalfWidth, new_capacity + old_capacity + 2 +
  399. // half_old_capacity)]. Taking the difference, we need to set min(kHalfWidth -
  400. // (old_capacity + 1), half_old_capacity)]. Since old_capacity < kHalfWidth,
  401. // half_old_capacity < kQuarterWidth, so we set kQuarterWidth beginning at
  402. // new_capacity + old_capacity + 2 to kEmpty.
  403. // Examples:
  404. // new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE
  405. // *new_ctrl = 12E0EEEEEEEEEEEE12E0EEEEEEEEEEE
  406. // position S /
  407. //
  408. // new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE
  409. // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE (no change)
  410. // position S /
  411. std::memset(new_ctrl + new_capacity + old_capacity_ + 2,
  412. static_cast<int8_t>(ctrl_t::kEmpty), kQuarterWidth);
  413. // Finally, we set the new sentinel byte.
  414. new_ctrl[new_capacity] = ctrl_t::kSentinel;
  415. }
  416. void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
  417. size_t new_capacity) {
  418. assert(is_single_group(new_capacity));
  419. std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
  420. NumControlBytes(new_capacity));
  421. assert(HashSetResizeHelper::SooSlotIndex() == 1);
  422. // This allows us to avoid branching on had_soo_slot_.
  423. assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
  424. new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
  425. new_ctrl[new_capacity] = ctrl_t::kSentinel;
  426. }
  427. void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(
  428. void* new_slots, size_t slot_size) const {
  429. assert(old_capacity_ > 0);
  430. const size_t half_old_capacity = old_capacity_ / 2;
  431. SanitizerUnpoisonMemoryRegion(old_slots(), slot_size * old_capacity_);
  432. std::memcpy(new_slots,
  433. SlotAddress(old_slots(), half_old_capacity + 1, slot_size),
  434. slot_size * half_old_capacity);
  435. std::memcpy(SlotAddress(new_slots, half_old_capacity + 1, slot_size),
  436. old_slots(), slot_size * (half_old_capacity + 1));
  437. }
  438. void HashSetResizeHelper::GrowSizeIntoSingleGroupTransferable(
  439. CommonFields& c, size_t slot_size) {
  440. assert(old_capacity_ < Group::kWidth / 2);
  441. assert(is_single_group(c.capacity()));
  442. assert(IsGrowingIntoSingleGroupApplicable(old_capacity_, c.capacity()));
  443. GrowIntoSingleGroupShuffleControlBytes(c.control(), c.capacity());
  444. GrowIntoSingleGroupShuffleTransferableSlots(c.slot_array(), slot_size);
  445. // We poison since GrowIntoSingleGroupShuffleTransferableSlots
  446. // may leave empty slots unpoisoned.
  447. PoisonSingleGroupEmptySlots(c, slot_size);
  448. }
  449. void HashSetResizeHelper::TransferSlotAfterSoo(CommonFields& c,
  450. size_t slot_size) {
  451. assert(was_soo_);
  452. assert(had_soo_slot_);
  453. assert(is_single_group(c.capacity()));
  454. std::memcpy(SlotAddress(c.slot_array(), SooSlotIndex(), slot_size),
  455. old_soo_data(), slot_size);
  456. PoisonSingleGroupEmptySlots(c, slot_size);
  457. }
  458. namespace {
  459. // Called whenever the table needs to vacate empty slots either by removing
  460. // tombstones via rehash or growth.
  461. Y_ABSL_ATTRIBUTE_NOINLINE
  462. FindInfo FindInsertPositionWithGrowthOrRehash(CommonFields& common, size_t hash,
  463. const PolicyFunctions& policy) {
  464. const size_t cap = common.capacity();
  465. if (cap > Group::kWidth &&
  466. // Do these calculations in 64-bit to avoid overflow.
  467. common.size() * uint64_t{32} <= cap * uint64_t{25}) {
  468. // Squash DELETED without growing if there is enough capacity.
  469. //
  470. // Rehash in place if the current size is <= 25/32 of capacity.
  471. // Rationale for such a high factor: 1) DropDeletesWithoutResize() is
  472. // faster than resize, and 2) it takes quite a bit of work to add
  473. // tombstones. In the worst case, seems to take approximately 4
  474. // insert/erase pairs to create a single tombstone and so if we are
  475. // rehashing because of tombstones, we can afford to rehash-in-place as
  476. // long as we are reclaiming at least 1/8 the capacity without doing more
  477. // than 2X the work. (Where "work" is defined to be size() for rehashing
  478. // or rehashing in place, and 1 for an insert or erase.) But rehashing in
  479. // place is faster per operation than inserting or even doubling the size
  480. // of the table, so we actually afford to reclaim even less space from a
  481. // resize-in-place. The decision is to rehash in place if we can reclaim
  482. // at about 1/8th of the usable capacity (specifically 3/28 of the
  483. // capacity) which means that the total cost of rehashing will be a small
  484. // fraction of the total work.
  485. //
  486. // Here is output of an experiment using the BM_CacheInSteadyState
  487. // benchmark running the old case (where we rehash-in-place only if we can
  488. // reclaim at least 7/16*capacity) vs. this code (which rehashes in place
  489. // if we can recover 3/32*capacity).
  490. //
  491. // Note that although in the worst-case number of rehashes jumped up from
  492. // 15 to 190, but the number of operations per second is almost the same.
  493. //
  494. // Abridged output of running BM_CacheInSteadyState benchmark from
  495. // raw_hash_set_benchmark. N is the number of insert/erase operations.
  496. //
  497. // | OLD (recover >= 7/16 | NEW (recover >= 3/32)
  498. // size | N/s LoadFactor NRehashes | N/s LoadFactor NRehashes
  499. // 448 | 145284 0.44 18 | 140118 0.44 19
  500. // 493 | 152546 0.24 11 | 151417 0.48 28
  501. // 538 | 151439 0.26 11 | 151152 0.53 38
  502. // 583 | 151765 0.28 11 | 150572 0.57 50
  503. // 628 | 150241 0.31 11 | 150853 0.61 66
  504. // 672 | 149602 0.33 12 | 150110 0.66 90
  505. // 717 | 149998 0.35 12 | 149531 0.70 129
  506. // 762 | 149836 0.37 13 | 148559 0.74 190
  507. // 807 | 149736 0.39 14 | 151107 0.39 14
  508. // 852 | 150204 0.42 15 | 151019 0.42 15
  509. DropDeletesWithoutResize(common, policy);
  510. } else {
  511. // Otherwise grow the container.
  512. policy.resize(common, NextCapacity(cap), HashtablezInfoHandle{});
  513. }
  514. // This function is typically called with tables containing deleted slots.
  515. // The table will be big and `FindFirstNonFullAfterResize` will always
  516. // fallback to `find_first_non_full`. So using `find_first_non_full` directly.
  517. return find_first_non_full(common, hash);
  518. }
  519. } // namespace
  520. const void* GetHashRefForEmptyHasher(const CommonFields& common) {
  521. // Empty base optimization typically make the empty base class address to be
  522. // the same as the first address of the derived class object.
  523. // But we generally assume that for empty hasher we can return any valid
  524. // pointer.
  525. return &common;
  526. }
  527. size_t PrepareInsertNonSoo(CommonFields& common, size_t hash, FindInfo target,
  528. const PolicyFunctions& policy) {
  529. // When there are no deleted slots in the table
  530. // and growth_left is positive, we can insert at the first
  531. // empty slot in the probe sequence (target).
  532. const bool use_target_hint =
  533. // Optimization is disabled when generations are enabled.
  534. // We have to rehash even sparse tables randomly in such mode.
  535. !SwisstableGenerationsEnabled() &&
  536. common.growth_info().HasNoDeletedAndGrowthLeft();
  537. if (Y_ABSL_PREDICT_FALSE(!use_target_hint)) {
  538. // Notes about optimized mode when generations are disabled:
  539. // We do not enter this branch if table has no deleted slots
  540. // and growth_left is positive.
  541. // We enter this branch in the following cases listed in decreasing
  542. // frequency:
  543. // 1. Table without deleted slots (>95% cases) that needs to be resized.
  544. // 2. Table with deleted slots that has space for the inserting element.
  545. // 3. Table with deleted slots that needs to be rehashed or resized.
  546. if (Y_ABSL_PREDICT_TRUE(common.growth_info().HasNoGrowthLeftAndNoDeleted())) {
  547. const size_t old_capacity = common.capacity();
  548. policy.resize(common, NextCapacity(old_capacity), HashtablezInfoHandle{});
  549. target = HashSetResizeHelper::FindFirstNonFullAfterResize(
  550. common, old_capacity, hash);
  551. } else {
  552. // Note: the table may have no deleted slots here when generations
  553. // are enabled.
  554. const bool rehash_for_bug_detection =
  555. common.should_rehash_for_bug_detection_on_insert();
  556. if (rehash_for_bug_detection) {
  557. // Move to a different heap allocation in order to detect bugs.
  558. const size_t cap = common.capacity();
  559. policy.resize(common,
  560. common.growth_left() > 0 ? cap : NextCapacity(cap),
  561. HashtablezInfoHandle{});
  562. }
  563. if (Y_ABSL_PREDICT_TRUE(common.growth_left() > 0)) {
  564. target = find_first_non_full(common, hash);
  565. } else {
  566. target = FindInsertPositionWithGrowthOrRehash(common, hash, policy);
  567. }
  568. }
  569. }
  570. PrepareInsertCommon(common);
  571. common.growth_info().OverwriteControlAsFull(common.control()[target.offset]);
  572. SetCtrl(common, target.offset, H2(hash), policy.slot_size);
  573. common.infoz().RecordInsert(hash, target.probe_length);
  574. return target.offset;
  575. }
  576. } // namespace container_internal
  577. Y_ABSL_NAMESPACE_END
  578. } // namespace y_absl