123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652 |
- #pragma once
- #include "fwd.h"
- #include <util/generic/utility.h>
- #include <util/generic/vector.h>
- #include <util/generic/mapfindptr.h>
- #include <util/str_stl.h>
- #include <util/ysaveload.h>
- /*
- * There are 2 classes in this file:
- * - TDenseHash - analog of THashMap
- * - TDenseHashSet - analog of THashSet
- */
- /*
- * Implements dense-hash, in some circumstances it is a lot (2x) faster than THashMap.
- * We support only adding new elements.
- * TKey value equal to EmptyMarker (by default, it is TKey())
- * can not be inserted into hash - it is used as marker of empty element.
- * TValue type must be default constructible
- */
- template <class TKey,
- class TValue,
- class TKeyHash,
- size_t MaxLoadFactor,
- size_t LogInitSize>
- class TDenseHash : public TMapOps<TDenseHash<TKey, TValue, TKeyHash, MaxLoadFactor, LogInitSize>> {
- private:
- template <class THash, class TVal>
- class TIteratorBase {
- friend class TDenseHash;
- template <class THash2, class TVal2>
- friend class TIteratorBase;
- THash* Hash;
- size_t Idx;
- // used only to implement end()
- TIteratorBase(THash* hash, size_t initIdx)
- : Hash(hash)
- , Idx(initIdx)
- {
- }
- public:
- TIteratorBase(THash& hash)
- : Hash(&hash)
- , Idx(0)
- {
- if (Hash->EmptyMarker == Hash->Buckets[Idx].first) {
- Next();
- }
- }
- template <class THash2, class TVal2>
- TIteratorBase(const TIteratorBase<THash2, TVal2>& it)
- : Hash(it.Hash)
- , Idx(it.Idx)
- {
- }
- TIteratorBase(const TIteratorBase&) = default;
- static TIteratorBase CreateEmpty() {
- return TIteratorBase(nullptr, 0);
- }
- TIteratorBase& operator=(const TIteratorBase&) = default;
- void Next() {
- ++Idx;
- while (Idx < Hash->Buckets.size() && Hash->EmptyMarker == Hash->Buckets[Idx].first) {
- ++Idx;
- }
- }
- TIteratorBase& operator++() {
- Next();
- return *this;
- }
- TVal& operator*() {
- return Hash->Buckets[Idx];
- }
- TVal* operator->() {
- return &Hash->Buckets[Idx];
- }
- const TVal* operator->() const {
- return &Hash->Buckets[Idx];
- }
- THash* GetHash() {
- return Hash;
- }
- bool operator==(const TIteratorBase& rhs) const {
- Y_ASSERT(Hash == rhs.Hash);
- return Idx == rhs.Idx;
- }
- bool operator!=(const TIteratorBase& rhs) const {
- return !(*this == rhs);
- }
- };
- public:
- using key_type = TKey;
- using mapped_type = TValue;
- using value_type = std::pair<const key_type, mapped_type>;
- using size_type = std::size_t;
- using difference_type = std::ptrdiff_t;
- using hasher = TKeyHash;
- using key_equal = std::equal_to<key_type>; // TODO(tender-bum): template argument
- // using allocator_type = ...
- using reference = value_type&;
- using const_reference = const value_type&;
- using pointer = value_type*; // TODO(tender-bum): std::allocator_traits<Alloc>::pointer;
- using const_pointer = const value_type*; // TODO(tender-bum):
- // std::allocator_traits<Alloc>::const_pointer;
- using iterator = TIteratorBase<TDenseHash, value_type>;
- using const_iterator = TIteratorBase<const TDenseHash, const value_type>;
- public:
- TDenseHash(const key_type& emptyMarker = key_type{}, size_type initSize = 0)
- : EmptyMarker(emptyMarker)
- {
- MakeEmpty(initSize);
- }
- TDenseHash(const TDenseHash&) = default;
- TDenseHash(TDenseHash&&) = default;
- TDenseHash& operator=(const TDenseHash& rhs) {
- TDenseHash tmp{ rhs };
- return *this = std::move(tmp);
- }
- TDenseHash& operator=(TDenseHash&&) = default;
- friend bool operator==(const TDenseHash& lhs, const TDenseHash& rhs) {
- return lhs.Size() == rhs.Size() &&
- AllOf(lhs, [&rhs](const auto& v) {
- auto it = rhs.find(v.first);
- return it != rhs.end() && *it == v;
- });
- }
- void Clear() {
- for (auto& bucket : Buckets) {
- if (bucket.first != EmptyMarker) {
- SetValue(bucket, EmptyMarker, mapped_type{});
- }
- }
- NumFilled = 0;
- }
- void MakeEmpty(size_type initSize = 0) {
- if (!initSize) {
- initSize = 1 << LogInitSize;
- } else {
- initSize = FastClp2(initSize);
- }
- BucketMask = initSize - 1;
- NumFilled = 0;
- TVector<value_type> tmp;
- for (size_type i = 0; i < initSize; ++i) {
- tmp.emplace_back(EmptyMarker, mapped_type{});
- }
- tmp.swap(Buckets);
- GrowThreshold = Max<size_type>(1, initSize * MaxLoadFactor / 100) - 1;
- }
- template <class K>
- bool Has(const K& key) const {
- return ProcessKey<bool>(
- key,
- [](size_type) { return true; },
- [](size_type) { return false; });
- }
- size_type Capacity() const {
- return Buckets.capacity();
- }
- bool Empty() const {
- return Size() == 0;
- }
- size_type Size() const {
- return NumFilled;
- }
- template <size_type maxFillPercents, size_type logInitSize>
- void Swap(TDenseHash<key_type, mapped_type, hasher, maxFillPercents, logInitSize>& other) {
- Buckets.swap(other.Buckets);
- DoSwap(BucketMask, other.BucketMask);
- DoSwap(NumFilled, other.NumFilled);
- DoSwap(GrowThreshold, other.GrowThreshold);
- DoSwap(EmptyMarker, other.EmptyMarker);
- }
- void Save(IOutputStream* s) const {
- // TODO(tender-bum): make SaveLoad great again
- ::SaveMany(s, BucketMask, NumFilled, GrowThreshold);
- // We need to do so because Buckets may be serialized as a pod-array
- // that doesn't correspond to the previous behaviour
- ::SaveSize(s, Buckets.size());
- for (const auto& b : Buckets) {
- ::Save(s, b.first);
- ::Save(s, b.second);
- }
- mapped_type defaultValue{};
- ::SaveMany(s, EmptyMarker, defaultValue);
- }
- void Load(IInputStream* s) {
- // TODO(tender-bum): make SaveLoad great again
- ::LoadMany(s, BucketMask, NumFilled, GrowThreshold);
- // We need to do so because we can't load const fields
- struct TPairMimic {
- key_type First;
- mapped_type Second;
- Y_SAVELOAD_DEFINE(First, Second);
- };
- TVector<TPairMimic> tmp;
- ::Load(s, tmp);
- Buckets.clear();
- for (auto& v : tmp) {
- Buckets.emplace_back(std::move(v.First), std::move(v.Second));
- }
- ::Load(s, EmptyMarker);
- mapped_type defaultValue;
- ::Load(s, defaultValue);
- }
- public:
- iterator begin() {
- return iterator(*this);
- }
- iterator end() {
- return iterator(this, Buckets.size());
- }
- const_iterator begin() const {
- return const_iterator(*this);
- }
- const_iterator end() const {
- return const_iterator(this, Buckets.size());
- }
- template <class K>
- iterator find(const K& key) {
- return ProcessKey<iterator>(
- key,
- [&](size_type idx) { return iterator(this, idx); },
- [&](size_type) { return end(); });
- }
- template <class K>
- const_iterator find(const K& key) const {
- return ProcessKey<const_iterator>(
- key,
- [&](size_type idx) { return const_iterator(this, idx); },
- [&](size_type) { return end(); });
- }
- template <class K>
- const TValue& at(const K& key) const {
- return ProcessKey<const TValue&>(
- key,
- [&](size_type idx) -> const TValue& { return Buckets[idx].second; },
- [&](size_type) -> const TValue& { throw std::out_of_range("TDenseHash: missing key"); });
- }
- template <class K>
- TValue& at(const K& key) {
- return ProcessKey<TValue&>(
- key,
- [&](size_type idx) -> TValue& { return Buckets[idx].second; },
- [&](size_type) -> TValue& { throw std::out_of_range("TDenseHash: missing key"); });
- }
- bool Grow(size_type to = 0, bool force = false) {
- if (!to) {
- to = Buckets.size() * 2;
- } else {
- to = FastClp2(to);
- if (to <= Buckets.size() && !force) {
- return false;
- }
- }
- TVector<value_type> oldBuckets(Reserve(to));
- for (size_type i = 0; i < to; ++i) {
- oldBuckets.emplace_back(EmptyMarker, mapped_type{});
- }
- oldBuckets.swap(Buckets);
- BucketMask = Buckets.size() - 1;
- GrowThreshold = Max<size_type>(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1;
- for (auto& item : oldBuckets) {
- if (EmptyMarker != item.first) {
- SetValue(FindProperBucket(item.first), std::move(item));
- }
- }
- return true;
- }
- // Grow to size with which GrowThreshold will be higher then passed value
- //
- // (to) = (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2 after conversion to size_type
- // is not less than x := (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 1 and FastClp2(to) is not less that (to)
- // (to) * (MaxLoadFactor / 100.f) >= x * (MaxLoadFactor / 100.f) = (desired_num_filled + 2) + (MaxLoadFactor / 100.f).
- // This require calculations with two or more significand decimal places
- // to have no less than (desired_num_filled + 2) after second conversion to size_type.
- // In that case after substracting 1 we got GrowThreshold >= desired_num_filled + 1
- //
- bool ReserveSpace(size_type desired_num_filled, bool force = false) {
- size_type to = Max<size_type>(1, (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2);
- return Grow(to, force);
- }
- // We need this overload because we want to optimize insertion when somebody inserts value_type.
- // So we don't need to extract the key.
- // This overload also allows brace enclosed initializer to be inserted.
- std::pair<iterator, bool> insert(const value_type& t) {
- size_type hs = hasher{}(t.first);
- auto p = GetBucketInfo(hs & BucketMask, t.first);
- if (p.second) {
- ++NumFilled;
- if (NumFilled >= GrowThreshold) {
- Grow();
- p.first = FindProperBucket(hs & BucketMask, t.first);
- }
- SetValue(p.first, t);
- return { iterator{ this, p.first }, true };
- }
- return { iterator{ this, p.first }, false };
- }
- // We need this overload because we want to optimize insertion when somebody inserts value_type.
- // So we don't need to extract the key.
- // This overload also allows brace enclosed initializer to be inserted.
- std::pair<iterator, bool> insert(value_type&& t) {
- size_type hs = hasher{}(t.first);
- auto p = GetBucketInfo(hs & BucketMask, t.first);
- if (p.second) {
- ++NumFilled;
- if (NumFilled >= GrowThreshold) {
- Grow();
- p.first = FindProperBucket(hs & BucketMask, t.first);
- }
- SetValue(p.first, std::move(t));
- return { iterator{ this, p.first }, true };
- }
- return { iterator{ this, p.first }, false };
- }
- // Standart integration. This overload is equivalent to emplace(std::forward<P>(p)).
- template <class P>
- std::enable_if_t<!std::is_same<std::decay_t<P>, value_type>::value,
- std::pair<iterator, bool>> insert(P&& p) {
- return emplace(std::forward<P>(p));
- }
- // Not really emplace because we need to know the key anyway. So we need to construct value_type.
- template <class... Args>
- std::pair<iterator, bool> emplace(Args&&... args) {
- return insert(value_type{ std::forward<Args>(args)... });
- }
- template <class K>
- mapped_type& operator[](K&& key) {
- size_type hs = hasher{}(key);
- auto p = GetBucketInfo(hs & BucketMask, key);
- if (p.second) {
- ++NumFilled;
- if (NumFilled >= GrowThreshold) {
- Grow();
- p.first = FindProperBucket(hs & BucketMask, key);
- }
- SetValue(p.first, std::forward<K>(key), mapped_type{});
- }
- return Buckets[p.first].second;
- }
- private:
- key_type EmptyMarker;
- size_type NumFilled;
- size_type BucketMask;
- size_type GrowThreshold;
- TVector<value_type> Buckets;
- private:
- // Tricky way to set value of type with const fields
- template <class... Args>
- void SetValue(value_type& bucket, Args&&... args) {
- bucket.~value_type();
- new (&bucket) value_type(std::forward<Args>(args)...);
- }
- template <class... Args>
- void SetValue(size_type idx, Args&&... args) {
- SetValue(Buckets[idx], std::forward<Args>(args)...);
- }
- template <class K>
- size_type FindProperBucket(size_type idx, const K& key) const {
- return ProcessIndex<size_type>(
- idx,
- key,
- [](size_type idx) { return idx; },
- [](size_type idx) { return idx; });
- }
- template <class K>
- size_type FindProperBucket(const K& key) const {
- return FindProperBucket(hasher{}(key) & BucketMask, key);
- }
- // { idx, is_empty }
- template <class K>
- std::pair<size_type, bool> GetBucketInfo(size_type idx, const K& key) const {
- return ProcessIndex<std::pair<size_type, bool>>(
- idx,
- key,
- [](size_type idx) { return std::make_pair(idx, false); },
- [](size_type idx) { return std::make_pair(idx, true); });
- }
- template <class R, class K, class OnFound, class OnEmpty>
- R ProcessIndex(size_type idx, const K& key, OnFound f0, OnEmpty f1) const {
- for (size_type numProbes = 1; EmptyMarker != Buckets[idx].first; ++numProbes) {
- if (Buckets[idx].first == key) {
- return f0(idx);
- }
- idx = (idx + numProbes) & BucketMask;
- }
- return f1(idx);
- }
- template <class R, class K, class OnFound, class OnEmpty>
- R ProcessKey(const K& key, OnFound&& f0, OnEmpty&& f1) const {
- return ProcessIndex<R>(
- hasher{}(key) & BucketMask, key, std::forward<OnFound>(f0), std::forward<OnEmpty>(f1));
- }
- };
- template <class TKey,
- class TKeyHash,
- size_t MaxLoadFactor,
- size_t LogInitSize>
- class TDenseHashSet {
- public:
- TDenseHashSet(const TKey& emptyMarker = TKey(), size_t initSize = 0)
- : EmptyMarker(emptyMarker)
- {
- MakeEmpty(initSize);
- }
- void Clear() {
- size_t currentSize = Buckets.size();
- Buckets.clear();
- Buckets.resize(currentSize, EmptyMarker);
- NumFilled = 0;
- }
- void MakeEmpty(size_t initSize = 0) {
- if (!initSize) {
- initSize = 1 << LogInitSize;
- } else {
- initSize = FastClp2(initSize);
- }
- BucketMask = initSize - 1;
- NumFilled = 0;
- TVector<TKey>(initSize, EmptyMarker).swap(Buckets);
- GrowThreshold = Max<size_t>(1, initSize * MaxLoadFactor / 100) - 1;
- }
- template <class K>
- bool Has(const K& key) const {
- return Buckets[FindBucket(key)] != EmptyMarker;
- }
- // gets existing item or inserts new
- template <class K>
- bool Insert(const K& key) {
- bool inserted = InsertNoGrow(key);
- if (inserted) {
- MaybeGrow();
- }
- return inserted;
- }
- size_t Capacity() const {
- return Buckets.capacity();
- }
- bool Empty() const {
- return Size() == 0;
- }
- size_t Size() const {
- return NumFilled;
- }
- template <size_t maxFillPercents, size_t logInitSize>
- void Swap(TDenseHashSet<TKey, TKeyHash, maxFillPercents, logInitSize>& other) {
- Buckets.swap(other.Buckets);
- DoSwap(BucketMask, other.BucketMask);
- DoSwap(NumFilled, other.NumFilled);
- DoSwap(GrowThreshold, other.GrowThreshold);
- DoSwap(EmptyMarker, other.EmptyMarker);
- }
- Y_SAVELOAD_DEFINE(BucketMask, NumFilled, GrowThreshold, Buckets, EmptyMarker);
- private:
- template <class THash>
- class TIteratorBase {
- friend class TDenseHashSet;
- THash* Hash;
- size_t Idx;
- // used only to implement end()
- TIteratorBase(THash* hash, size_t initIdx)
- : Hash(hash)
- , Idx(initIdx)
- {
- }
- public:
- TIteratorBase(THash& hash)
- : Hash(&hash)
- , Idx(0)
- {
- if (Hash->Buckets[Idx] == Hash->EmptyMarker) {
- Next();
- }
- }
- void Next() {
- ++Idx;
- while (Idx < Hash->Buckets.size() && Hash->Buckets[Idx] == Hash->EmptyMarker) {
- ++Idx;
- }
- }
- TIteratorBase& operator++() {
- Next();
- return *this;
- }
- bool Initialized() const {
- return Hash != nullptr;
- }
- bool Ok() const {
- return Idx < Hash->Buckets.size();
- }
- const TKey& operator*() const {
- return Key();
- }
- const TKey& Key() const {
- return Hash->Buckets[Idx];
- }
- bool operator==(const TIteratorBase& rhs) const {
- Y_ASSERT(Hash == rhs.Hash);
- return Idx == rhs.Idx;
- }
- bool operator!=(const TIteratorBase& rhs) const {
- return !(*this == rhs);
- }
- };
- public:
- typedef TIteratorBase<const TDenseHashSet> TConstIterator;
- TConstIterator begin() const {
- return TConstIterator(*this);
- }
- TConstIterator end() const {
- return TConstIterator(this, Buckets.size());
- }
- private:
- size_t BucketMask;
- size_t NumFilled;
- size_t GrowThreshold;
- TVector<TKey> Buckets;
- TKey EmptyMarker;
- template <class K>
- bool InsertNoGrow(const K& key) {
- size_t idx = FindBucket(key);
- if (Buckets[idx] == EmptyMarker) {
- ++NumFilled;
- Buckets[idx] = key;
- return true;
- }
- return false;
- }
- bool MaybeGrow() {
- if (NumFilled < GrowThreshold) {
- return false;
- }
- TVector<TKey> oldBuckets(Buckets.size() * 2, EmptyMarker);
- oldBuckets.swap(Buckets);
- BucketMask = Buckets.size() - 1;
- GrowThreshold = Max<size_t>(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1;
- NumFilled = 0;
- for (const TKey& key : oldBuckets) {
- if (key != EmptyMarker) {
- InsertNoGrow(key);
- }
- }
- return true;
- }
- template <class K>
- size_t FindBucket(const K& key) const {
- size_t idx = TKeyHash()(key) & BucketMask;
- for (size_t numProbes = 1; Buckets[idx] != EmptyMarker; ++numProbes) {
- if (Buckets[idx] == key) {
- return idx;
- }
- idx = (idx + numProbes) & BucketMask;
- }
- return idx;
- }
- };
|