#pragma once #include "fwd.h" #include #include #include #include #include #include /* * There are 2 classes in this file: * - TDenseHash - analog of THashMap * - TDenseHashSet - analog of THashSet */ /* * Implements dense-hash, in some circumstances it is a lot (2x) faster than THashMap. * We support only adding new elements. * TKey value equal to EmptyMarker (by default, it is TKey()) * can not be inserted into hash - it is used as marker of empty element. * TValue type must be default constructible */ template class TDenseHash : public TMapOps> { private: template class TIteratorBase { friend class TDenseHash; template friend class TIteratorBase; THash* Hash; size_t Idx; // used only to implement end() TIteratorBase(THash* hash, size_t initIdx) : Hash(hash) , Idx(initIdx) { } public: TIteratorBase(THash& hash) : Hash(&hash) , Idx(0) { if (Hash->EmptyMarker == Hash->Buckets[Idx].first) { Next(); } } template TIteratorBase(const TIteratorBase& it) : Hash(it.Hash) , Idx(it.Idx) { } TIteratorBase(const TIteratorBase&) = default; static TIteratorBase CreateEmpty() { return TIteratorBase(nullptr, 0); } TIteratorBase& operator=(const TIteratorBase&) = default; void Next() { ++Idx; while (Idx < Hash->Buckets.size() && Hash->EmptyMarker == Hash->Buckets[Idx].first) { ++Idx; } } TIteratorBase& operator++() { Next(); return *this; } TVal& operator*() { return Hash->Buckets[Idx]; } TVal* operator->() { return &Hash->Buckets[Idx]; } const TVal* operator->() const { return &Hash->Buckets[Idx]; } THash* GetHash() { return Hash; } bool operator==(const TIteratorBase& rhs) const { Y_ASSERT(Hash == rhs.Hash); return Idx == rhs.Idx; } bool operator!=(const TIteratorBase& rhs) const { return !(*this == rhs); } }; public: using key_type = TKey; using mapped_type = TValue; using value_type = std::pair; using size_type = std::size_t; using difference_type = std::ptrdiff_t; using hasher = TKeyHash; using key_equal = std::equal_to; // TODO(tender-bum): template argument // using allocator_type = ... using reference = value_type&; using const_reference = const value_type&; using pointer = value_type*; // TODO(tender-bum): std::allocator_traits::pointer; using const_pointer = const value_type*; // TODO(tender-bum): // std::allocator_traits::const_pointer; using iterator = TIteratorBase; using const_iterator = TIteratorBase; public: TDenseHash(const key_type& emptyMarker = key_type{}, size_type initSize = 0) : EmptyMarker(emptyMarker) { MakeEmpty(initSize); } TDenseHash(const TDenseHash&) = default; TDenseHash(TDenseHash&&) = default; TDenseHash& operator=(const TDenseHash& rhs) { TDenseHash tmp{ rhs }; return *this = std::move(tmp); } TDenseHash& operator=(TDenseHash&&) = default; friend bool operator==(const TDenseHash& lhs, const TDenseHash& rhs) { return lhs.Size() == rhs.Size() && AllOf(lhs, [&rhs](const auto& v) { auto it = rhs.find(v.first); return it != rhs.end() && *it == v; }); } void Clear() { for (auto& bucket : Buckets) { if (bucket.first != EmptyMarker) { SetValue(bucket, EmptyMarker, mapped_type{}); } } NumFilled = 0; } void MakeEmpty(size_type initSize = 0) { if (!initSize) { initSize = 1 << LogInitSize; } else { initSize = FastClp2(initSize); } BucketMask = initSize - 1; NumFilled = 0; TVector tmp; for (size_type i = 0; i < initSize; ++i) { tmp.emplace_back(EmptyMarker, mapped_type{}); } tmp.swap(Buckets); GrowThreshold = Max(1, initSize * MaxLoadFactor / 100) - 1; } template bool Has(const K& key) const { return ProcessKey( key, [](size_type) { return true; }, [](size_type) { return false; }); } size_type Capacity() const { return Buckets.capacity(); } bool Empty() const { return Size() == 0; } size_type Size() const { return NumFilled; } template void Swap(TDenseHash& other) { Buckets.swap(other.Buckets); DoSwap(BucketMask, other.BucketMask); DoSwap(NumFilled, other.NumFilled); DoSwap(GrowThreshold, other.GrowThreshold); DoSwap(EmptyMarker, other.EmptyMarker); } void Save(IOutputStream* s) const { // TODO(tender-bum): make SaveLoad great again ::SaveMany(s, BucketMask, NumFilled, GrowThreshold); // We need to do so because Buckets may be serialized as a pod-array // that doesn't correspond to the previous behaviour ::SaveSize(s, Buckets.size()); for (const auto& b : Buckets) { ::Save(s, b.first); ::Save(s, b.second); } mapped_type defaultValue{}; ::SaveMany(s, EmptyMarker, defaultValue); } void Load(IInputStream* s) { // TODO(tender-bum): make SaveLoad great again ::LoadMany(s, BucketMask, NumFilled, GrowThreshold); // We need to do so because we can't load const fields struct TPairMimic { key_type First; mapped_type Second; Y_SAVELOAD_DEFINE(First, Second); }; TVector tmp; ::Load(s, tmp); Buckets.clear(); for (auto& v : tmp) { Buckets.emplace_back(std::move(v.First), std::move(v.Second)); } ::Load(s, EmptyMarker); mapped_type defaultValue; ::Load(s, defaultValue); } public: iterator begin() { return iterator(*this); } iterator end() { return iterator(this, Buckets.size()); } const_iterator begin() const { return const_iterator(*this); } const_iterator end() const { return const_iterator(this, Buckets.size()); } template iterator find(const K& key) { return ProcessKey( key, [&](size_type idx) { return iterator(this, idx); }, [&](size_type) { return end(); }); } template const_iterator find(const K& key) const { return ProcessKey( key, [&](size_type idx) { return const_iterator(this, idx); }, [&](size_type) { return end(); }); } template const TValue& at(const K& key) const { return ProcessKey( key, [&](size_type idx) -> const TValue& { return Buckets[idx].second; }, [&](size_type) -> const TValue& { throw std::out_of_range("TDenseHash: missing key"); }); } template TValue& at(const K& key) { return ProcessKey( key, [&](size_type idx) -> TValue& { return Buckets[idx].second; }, [&](size_type) -> TValue& { throw std::out_of_range("TDenseHash: missing key"); }); } bool Grow(size_type to = 0, bool force = false) { if (!to) { to = Buckets.size() * 2; } else { to = FastClp2(to); if (to <= Buckets.size() && !force) { return false; } } TVector oldBuckets(Reserve(to)); for (size_type i = 0; i < to; ++i) { oldBuckets.emplace_back(EmptyMarker, mapped_type{}); } oldBuckets.swap(Buckets); BucketMask = Buckets.size() - 1; GrowThreshold = Max(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1; for (auto& item : oldBuckets) { if (EmptyMarker != item.first) { SetValue(FindProperBucket(item.first), std::move(item)); } } return true; } // Grow to size with which GrowThreshold will be higher then passed value // // (to) = (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2 after conversion to size_type // is not less than x := (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 1 and FastClp2(to) is not less that (to) // (to) * (MaxLoadFactor / 100.f) >= x * (MaxLoadFactor / 100.f) = (desired_num_filled + 2) + (MaxLoadFactor / 100.f). // This require calculations with two or more significand decimal places // to have no less than (desired_num_filled + 2) after second conversion to size_type. // In that case after substracting 1 we got GrowThreshold >= desired_num_filled + 1 // bool ReserveSpace(size_type desired_num_filled, bool force = false) { size_type to = Max(1, (desired_num_filled + 2) * (100.f / MaxLoadFactor) + 2); return Grow(to, force); } // We need this overload because we want to optimize insertion when somebody inserts value_type. // So we don't need to extract the key. // This overload also allows brace enclosed initializer to be inserted. std::pair insert(const value_type& t) { size_type hs = hasher{}(t.first); auto p = GetBucketInfo(hs & BucketMask, t.first); if (p.second) { ++NumFilled; if (NumFilled >= GrowThreshold) { Grow(); p.first = FindProperBucket(hs & BucketMask, t.first); } SetValue(p.first, t); return { iterator{ this, p.first }, true }; } return { iterator{ this, p.first }, false }; } // We need this overload because we want to optimize insertion when somebody inserts value_type. // So we don't need to extract the key. // This overload also allows brace enclosed initializer to be inserted. std::pair insert(value_type&& t) { size_type hs = hasher{}(t.first); auto p = GetBucketInfo(hs & BucketMask, t.first); if (p.second) { ++NumFilled; if (NumFilled >= GrowThreshold) { Grow(); p.first = FindProperBucket(hs & BucketMask, t.first); } SetValue(p.first, std::move(t)); return { iterator{ this, p.first }, true }; } return { iterator{ this, p.first }, false }; } // Standart integration. This overload is equivalent to emplace(std::forward

(p)). template std::enable_if_t, value_type>::value, std::pair> insert(P&& p) { return emplace(std::forward

(p)); } // Not really emplace because we need to know the key anyway. So we need to construct value_type. template std::pair emplace(Args&&... args) { return insert(value_type{ std::forward(args)... }); } template mapped_type& operator[](K&& key) { size_type hs = hasher{}(key); auto p = GetBucketInfo(hs & BucketMask, key); if (p.second) { ++NumFilled; if (NumFilled >= GrowThreshold) { Grow(); p.first = FindProperBucket(hs & BucketMask, key); } SetValue(p.first, std::forward(key), mapped_type{}); } return Buckets[p.first].second; } private: key_type EmptyMarker; size_type NumFilled; size_type BucketMask; size_type GrowThreshold; TVector Buckets; private: // Tricky way to set value of type with const fields template void SetValue(value_type& bucket, Args&&... args) { bucket.~value_type(); new (&bucket) value_type(std::forward(args)...); } template void SetValue(size_type idx, Args&&... args) { SetValue(Buckets[idx], std::forward(args)...); } template size_type FindProperBucket(size_type idx, const K& key) const { return ProcessIndex( idx, key, [](size_type idx) { return idx; }, [](size_type idx) { return idx; }); } template size_type FindProperBucket(const K& key) const { return FindProperBucket(hasher{}(key) & BucketMask, key); } // { idx, is_empty } template std::pair GetBucketInfo(size_type idx, const K& key) const { return ProcessIndex>( idx, key, [](size_type idx) { return std::make_pair(idx, false); }, [](size_type idx) { return std::make_pair(idx, true); }); } template R ProcessIndex(size_type idx, const K& key, OnFound f0, OnEmpty f1) const { for (size_type numProbes = 1; EmptyMarker != Buckets[idx].first; ++numProbes) { if (Buckets[idx].first == key) { return f0(idx); } idx = (idx + numProbes) & BucketMask; } return f1(idx); } template R ProcessKey(const K& key, OnFound&& f0, OnEmpty&& f1) const { return ProcessIndex( hasher{}(key) & BucketMask, key, std::forward(f0), std::forward(f1)); } }; template class TDenseHashSet { public: TDenseHashSet(const TKey& emptyMarker = TKey(), size_t initSize = 0) : EmptyMarker(emptyMarker) { MakeEmpty(initSize); } void Clear() { size_t currentSize = Buckets.size(); Buckets.clear(); Buckets.resize(currentSize, EmptyMarker); NumFilled = 0; } void MakeEmpty(size_t initSize = 0) { if (!initSize) { initSize = 1 << LogInitSize; } else { initSize = FastClp2(initSize); } BucketMask = initSize - 1; NumFilled = 0; TVector(initSize, EmptyMarker).swap(Buckets); GrowThreshold = Max(1, initSize * MaxLoadFactor / 100) - 1; } template bool Has(const K& key) const { return Buckets[FindBucket(key)] != EmptyMarker; } // gets existing item or inserts new template bool Insert(const K& key) { bool inserted = InsertNoGrow(key); if (inserted) { MaybeGrow(); } return inserted; } size_t Capacity() const { return Buckets.capacity(); } bool Empty() const { return Size() == 0; } size_t Size() const { return NumFilled; } template void Swap(TDenseHashSet& other) { Buckets.swap(other.Buckets); DoSwap(BucketMask, other.BucketMask); DoSwap(NumFilled, other.NumFilled); DoSwap(GrowThreshold, other.GrowThreshold); DoSwap(EmptyMarker, other.EmptyMarker); } Y_SAVELOAD_DEFINE(BucketMask, NumFilled, GrowThreshold, Buckets, EmptyMarker); private: template class TIteratorBase { friend class TDenseHashSet; THash* Hash; size_t Idx; // used only to implement end() TIteratorBase(THash* hash, size_t initIdx) : Hash(hash) , Idx(initIdx) { } public: TIteratorBase(THash& hash) : Hash(&hash) , Idx(0) { if (Hash->Buckets[Idx] == Hash->EmptyMarker) { Next(); } } void Next() { ++Idx; while (Idx < Hash->Buckets.size() && Hash->Buckets[Idx] == Hash->EmptyMarker) { ++Idx; } } TIteratorBase& operator++() { Next(); return *this; } bool Initialized() const { return Hash != nullptr; } bool Ok() const { return Idx < Hash->Buckets.size(); } const TKey& operator*() const { return Key(); } const TKey& Key() const { return Hash->Buckets[Idx]; } bool operator==(const TIteratorBase& rhs) const { Y_ASSERT(Hash == rhs.Hash); return Idx == rhs.Idx; } bool operator!=(const TIteratorBase& rhs) const { return !(*this == rhs); } }; public: typedef TIteratorBase TConstIterator; TConstIterator begin() const { return TConstIterator(*this); } TConstIterator end() const { return TConstIterator(this, Buckets.size()); } private: size_t BucketMask; size_t NumFilled; size_t GrowThreshold; TVector Buckets; TKey EmptyMarker; template bool InsertNoGrow(const K& key) { size_t idx = FindBucket(key); if (Buckets[idx] == EmptyMarker) { ++NumFilled; Buckets[idx] = key; return true; } return false; } bool MaybeGrow() { if (NumFilled < GrowThreshold) { return false; } TVector oldBuckets(Buckets.size() * 2, EmptyMarker); oldBuckets.swap(Buckets); BucketMask = Buckets.size() - 1; GrowThreshold = Max(1, Buckets.size() * (MaxLoadFactor / 100.f)) - 1; NumFilled = 0; for (const TKey& key : oldBuckets) { if (key != EmptyMarker) { InsertNoGrow(key); } } return true; } template size_t FindBucket(const K& key) const { size_t idx = TKeyHash()(key) & BucketMask; for (size_t numProbes = 1; Buckets[idx] != EmptyMarker; ++numProbes) { if (Buckets[idx] == key) { return idx; } idx = (idx + numProbes) & BucketMask; } return idx; } };