123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===- llvm/ADT/CoalescingBitVector.h - A coalescing bitvector --*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- ///
- /// \file
- /// A bitvector that uses an IntervalMap to coalesce adjacent elements
- /// into intervals.
- ///
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_ADT_COALESCINGBITVECTOR_H
- #define LLVM_ADT_COALESCINGBITVECTOR_H
- #include "llvm/ADT/IntervalMap.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/iterator_range.h"
- #include "llvm/Support/Debug.h"
- #include "llvm/Support/raw_ostream.h"
- #include <initializer_list>
- namespace llvm {
- /// A bitvector that, under the hood, relies on an IntervalMap to coalesce
- /// elements into intervals. Good for representing sets which predominantly
- /// contain contiguous ranges. Bad for representing sets with lots of gaps
- /// between elements.
- ///
- /// Compared to SparseBitVector, CoalescingBitVector offers more predictable
- /// performance for non-sequential find() operations.
- ///
- /// \tparam IndexT - The type of the index into the bitvector.
- template <typename IndexT> class CoalescingBitVector {
- static_assert(std::is_unsigned<IndexT>::value,
- "Index must be an unsigned integer.");
- using ThisT = CoalescingBitVector<IndexT>;
- /// An interval map for closed integer ranges. The mapped values are unused.
- using MapT = IntervalMap<IndexT, char>;
- using UnderlyingIterator = typename MapT::const_iterator;
- using IntervalT = std::pair<IndexT, IndexT>;
- public:
- using Allocator = typename MapT::Allocator;
- /// Construct by passing in a CoalescingBitVector<IndexT>::Allocator
- /// reference.
- CoalescingBitVector(Allocator &Alloc)
- : Alloc(&Alloc), Intervals(Alloc) {}
- /// \name Copy/move constructors and assignment operators.
- /// @{
- CoalescingBitVector(const ThisT &Other)
- : Alloc(Other.Alloc), Intervals(*Other.Alloc) {
- set(Other);
- }
- ThisT &operator=(const ThisT &Other) {
- clear();
- set(Other);
- return *this;
- }
- CoalescingBitVector(ThisT &&Other) = delete;
- ThisT &operator=(ThisT &&Other) = delete;
- /// @}
- /// Clear all the bits.
- void clear() { Intervals.clear(); }
- /// Check whether no bits are set.
- bool empty() const { return Intervals.empty(); }
- /// Count the number of set bits.
- unsigned count() const {
- unsigned Bits = 0;
- for (auto It = Intervals.begin(), End = Intervals.end(); It != End; ++It)
- Bits += 1 + It.stop() - It.start();
- return Bits;
- }
- /// Set the bit at \p Index.
- ///
- /// This method does /not/ support setting a bit that has already been set,
- /// for efficiency reasons. If possible, restructure your code to not set the
- /// same bit multiple times, or use \ref test_and_set.
- void set(IndexT Index) {
- assert(!test(Index) && "Setting already-set bits not supported/efficient, "
- "IntervalMap will assert");
- insert(Index, Index);
- }
- /// Set the bits set in \p Other.
- ///
- /// This method does /not/ support setting already-set bits, see \ref set
- /// for the rationale. For a safe set union operation, use \ref operator|=.
- void set(const ThisT &Other) {
- for (auto It = Other.Intervals.begin(), End = Other.Intervals.end();
- It != End; ++It)
- insert(It.start(), It.stop());
- }
- /// Set the bits at \p Indices. Used for testing, primarily.
- void set(std::initializer_list<IndexT> Indices) {
- for (IndexT Index : Indices)
- set(Index);
- }
- /// Check whether the bit at \p Index is set.
- bool test(IndexT Index) const {
- const auto It = Intervals.find(Index);
- if (It == Intervals.end())
- return false;
- assert(It.stop() >= Index && "Interval must end after Index");
- return It.start() <= Index;
- }
- /// Set the bit at \p Index. Supports setting an already-set bit.
- void test_and_set(IndexT Index) {
- if (!test(Index))
- set(Index);
- }
- /// Reset the bit at \p Index. Supports resetting an already-unset bit.
- void reset(IndexT Index) {
- auto It = Intervals.find(Index);
- if (It == Intervals.end())
- return;
- // Split the interval containing Index into up to two parts: one from
- // [Start, Index-1] and another from [Index+1, Stop]. If Index is equal to
- // either Start or Stop, we create one new interval. If Index is equal to
- // both Start and Stop, we simply erase the existing interval.
- IndexT Start = It.start();
- if (Index < Start)
- // The index was not set.
- return;
- IndexT Stop = It.stop();
- assert(Index <= Stop && "Wrong interval for index");
- It.erase();
- if (Start < Index)
- insert(Start, Index - 1);
- if (Index < Stop)
- insert(Index + 1, Stop);
- }
- /// Set union. If \p RHS is guaranteed to not overlap with this, \ref set may
- /// be a faster alternative.
- void operator|=(const ThisT &RHS) {
- // Get the overlaps between the two interval maps.
- SmallVector<IntervalT, 8> Overlaps;
- getOverlaps(RHS, Overlaps);
- // Insert the non-overlapping parts of all the intervals from RHS.
- for (auto It = RHS.Intervals.begin(), End = RHS.Intervals.end();
- It != End; ++It) {
- IndexT Start = It.start();
- IndexT Stop = It.stop();
- SmallVector<IntervalT, 8> NonOverlappingParts;
- getNonOverlappingParts(Start, Stop, Overlaps, NonOverlappingParts);
- for (IntervalT AdditivePortion : NonOverlappingParts)
- insert(AdditivePortion.first, AdditivePortion.second);
- }
- }
- /// Set intersection.
- void operator&=(const ThisT &RHS) {
- // Get the overlaps between the two interval maps (i.e. the intersection).
- SmallVector<IntervalT, 8> Overlaps;
- getOverlaps(RHS, Overlaps);
- // Rebuild the interval map, including only the overlaps.
- clear();
- for (IntervalT Overlap : Overlaps)
- insert(Overlap.first, Overlap.second);
- }
- /// Reset all bits present in \p Other.
- void intersectWithComplement(const ThisT &Other) {
- SmallVector<IntervalT, 8> Overlaps;
- if (!getOverlaps(Other, Overlaps)) {
- // If there is no overlap with Other, the intersection is empty.
- return;
- }
- // Delete the overlapping intervals. Split up intervals that only partially
- // intersect an overlap.
- for (IntervalT Overlap : Overlaps) {
- IndexT OlapStart, OlapStop;
- std::tie(OlapStart, OlapStop) = Overlap;
- auto It = Intervals.find(OlapStart);
- IndexT CurrStart = It.start();
- IndexT CurrStop = It.stop();
- assert(CurrStart <= OlapStart && OlapStop <= CurrStop &&
- "Expected some intersection!");
- // Split the overlap interval into up to two parts: one from [CurrStart,
- // OlapStart-1] and another from [OlapStop+1, CurrStop]. If OlapStart is
- // equal to CurrStart, the first split interval is unnecessary. Ditto for
- // when OlapStop is equal to CurrStop, we omit the second split interval.
- It.erase();
- if (CurrStart < OlapStart)
- insert(CurrStart, OlapStart - 1);
- if (OlapStop < CurrStop)
- insert(OlapStop + 1, CurrStop);
- }
- }
- bool operator==(const ThisT &RHS) const {
- // We cannot just use std::equal because it checks the dereferenced values
- // of an iterator pair for equality, not the iterators themselves. In our
- // case that results in comparison of the (unused) IntervalMap values.
- auto ItL = Intervals.begin();
- auto ItR = RHS.Intervals.begin();
- while (ItL != Intervals.end() && ItR != RHS.Intervals.end() &&
- ItL.start() == ItR.start() && ItL.stop() == ItR.stop()) {
- ++ItL;
- ++ItR;
- }
- return ItL == Intervals.end() && ItR == RHS.Intervals.end();
- }
- bool operator!=(const ThisT &RHS) const { return !operator==(RHS); }
- class const_iterator {
- friend class CoalescingBitVector;
- public:
- using iterator_category = std::forward_iterator_tag;
- using value_type = IndexT;
- using difference_type = std::ptrdiff_t;
- using pointer = value_type *;
- using reference = value_type &;
- private:
- // For performance reasons, make the offset at the end different than the
- // one used in \ref begin, to optimize the common `It == end()` pattern.
- static constexpr unsigned kIteratorAtTheEndOffset = ~0u;
- UnderlyingIterator MapIterator;
- unsigned OffsetIntoMapIterator = 0;
- // Querying the start/stop of an IntervalMap iterator can be very expensive.
- // Cache these values for performance reasons.
- IndexT CachedStart = IndexT();
- IndexT CachedStop = IndexT();
- void setToEnd() {
- OffsetIntoMapIterator = kIteratorAtTheEndOffset;
- CachedStart = IndexT();
- CachedStop = IndexT();
- }
- /// MapIterator has just changed, reset the cached state to point to the
- /// start of the new underlying iterator.
- void resetCache() {
- if (MapIterator.valid()) {
- OffsetIntoMapIterator = 0;
- CachedStart = MapIterator.start();
- CachedStop = MapIterator.stop();
- } else {
- setToEnd();
- }
- }
- /// Advance the iterator to \p Index, if it is contained within the current
- /// interval. The public-facing method which supports advancing past the
- /// current interval is \ref advanceToLowerBound.
- void advanceTo(IndexT Index) {
- assert(Index <= CachedStop && "Cannot advance to OOB index");
- if (Index < CachedStart)
- // We're already past this index.
- return;
- OffsetIntoMapIterator = Index - CachedStart;
- }
- const_iterator(UnderlyingIterator MapIt) : MapIterator(MapIt) {
- resetCache();
- }
- public:
- const_iterator() { setToEnd(); }
- bool operator==(const const_iterator &RHS) const {
- // Do /not/ compare MapIterator for equality, as this is very expensive.
- // The cached start/stop values make that check unnecessary.
- return std::tie(OffsetIntoMapIterator, CachedStart, CachedStop) ==
- std::tie(RHS.OffsetIntoMapIterator, RHS.CachedStart,
- RHS.CachedStop);
- }
- bool operator!=(const const_iterator &RHS) const {
- return !operator==(RHS);
- }
- IndexT operator*() const { return CachedStart + OffsetIntoMapIterator; }
- const_iterator &operator++() { // Pre-increment (++It).
- if (CachedStart + OffsetIntoMapIterator < CachedStop) {
- // Keep going within the current interval.
- ++OffsetIntoMapIterator;
- } else {
- // We reached the end of the current interval: advance.
- ++MapIterator;
- resetCache();
- }
- return *this;
- }
- const_iterator operator++(int) { // Post-increment (It++).
- const_iterator tmp = *this;
- operator++();
- return tmp;
- }
- /// Advance the iterator to the first set bit AT, OR AFTER, \p Index. If
- /// no such set bit exists, advance to end(). This is like std::lower_bound.
- /// This is useful if \p Index is close to the current iterator position.
- /// However, unlike \ref find(), this has worst-case O(n) performance.
- void advanceToLowerBound(IndexT Index) {
- if (OffsetIntoMapIterator == kIteratorAtTheEndOffset)
- return;
- // Advance to the first interval containing (or past) Index, or to end().
- while (Index > CachedStop) {
- ++MapIterator;
- resetCache();
- if (OffsetIntoMapIterator == kIteratorAtTheEndOffset)
- return;
- }
- advanceTo(Index);
- }
- };
- const_iterator begin() const { return const_iterator(Intervals.begin()); }
- const_iterator end() const { return const_iterator(); }
- /// Return an iterator pointing to the first set bit AT, OR AFTER, \p Index.
- /// If no such set bit exists, return end(). This is like std::lower_bound.
- /// This has worst-case logarithmic performance (roughly O(log(gaps between
- /// contiguous ranges))).
- const_iterator find(IndexT Index) const {
- auto UnderlyingIt = Intervals.find(Index);
- if (UnderlyingIt == Intervals.end())
- return end();
- auto It = const_iterator(UnderlyingIt);
- It.advanceTo(Index);
- return It;
- }
- /// Return a range iterator which iterates over all of the set bits in the
- /// half-open range [Start, End).
- iterator_range<const_iterator> half_open_range(IndexT Start,
- IndexT End) const {
- assert(Start < End && "Not a valid range");
- auto StartIt = find(Start);
- if (StartIt == end() || *StartIt >= End)
- return {end(), end()};
- auto EndIt = StartIt;
- EndIt.advanceToLowerBound(End);
- return {StartIt, EndIt};
- }
- void print(raw_ostream &OS) const {
- OS << "{";
- for (auto It = Intervals.begin(), End = Intervals.end(); It != End;
- ++It) {
- OS << "[" << It.start();
- if (It.start() != It.stop())
- OS << ", " << It.stop();
- OS << "]";
- }
- OS << "}";
- }
- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const {
- // LLDB swallows the first line of output after callling dump(). Add
- // newlines before/after the braces to work around this.
- dbgs() << "\n";
- print(dbgs());
- dbgs() << "\n";
- }
- #endif
- private:
- void insert(IndexT Start, IndexT End) { Intervals.insert(Start, End, 0); }
- /// Record the overlaps between \p this and \p Other in \p Overlaps. Return
- /// true if there is any overlap.
- bool getOverlaps(const ThisT &Other,
- SmallVectorImpl<IntervalT> &Overlaps) const {
- for (IntervalMapOverlaps<MapT, MapT> I(Intervals, Other.Intervals);
- I.valid(); ++I)
- Overlaps.emplace_back(I.start(), I.stop());
- assert(llvm::is_sorted(Overlaps,
- [](IntervalT LHS, IntervalT RHS) {
- return LHS.second < RHS.first;
- }) &&
- "Overlaps must be sorted");
- return !Overlaps.empty();
- }
- /// Given the set of overlaps between this and some other bitvector, and an
- /// interval [Start, Stop] from that bitvector, determine the portions of the
- /// interval which do not overlap with this.
- void getNonOverlappingParts(IndexT Start, IndexT Stop,
- const SmallVectorImpl<IntervalT> &Overlaps,
- SmallVectorImpl<IntervalT> &NonOverlappingParts) {
- IndexT NextUncoveredBit = Start;
- for (IntervalT Overlap : Overlaps) {
- IndexT OlapStart, OlapStop;
- std::tie(OlapStart, OlapStop) = Overlap;
- // [Start;Stop] and [OlapStart;OlapStop] overlap iff OlapStart <= Stop
- // and Start <= OlapStop.
- bool DoesOverlap = OlapStart <= Stop && Start <= OlapStop;
- if (!DoesOverlap)
- continue;
- // Cover the range [NextUncoveredBit, OlapStart). This puts the start of
- // the next uncovered range at OlapStop+1.
- if (NextUncoveredBit < OlapStart)
- NonOverlappingParts.emplace_back(NextUncoveredBit, OlapStart - 1);
- NextUncoveredBit = OlapStop + 1;
- if (NextUncoveredBit > Stop)
- break;
- }
- if (NextUncoveredBit <= Stop)
- NonOverlappingParts.emplace_back(NextUncoveredBit, Stop);
- }
- Allocator *Alloc;
- MapT Intervals;
- };
- } // namespace llvm
- #endif // LLVM_ADT_COALESCINGBITVECTOR_H
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|