sparse_set.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. // Copyright 2006 The RE2 Authors. All Rights Reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. #ifndef RE2_SPARSE_SET_H_
  5. #define RE2_SPARSE_SET_H_
  6. // DESCRIPTION
  7. //
  8. // SparseSet(m) is a set of integers in [0, m).
  9. // It requires sizeof(int)*m memory, but it provides
  10. // fast iteration through the elements in the set and fast clearing
  11. // of the set.
  12. //
  13. // Insertion and deletion are constant time operations.
  14. //
  15. // Allocating the set is a constant time operation
  16. // when memory allocation is a constant time operation.
  17. //
  18. // Clearing the set is a constant time operation (unusual!).
  19. //
  20. // Iterating through the set is an O(n) operation, where n
  21. // is the number of items in the set (not O(m)).
  22. //
  23. // The set iterator visits entries in the order they were first
  24. // inserted into the set. It is safe to add items to the set while
  25. // using an iterator: the iterator will visit indices added to the set
  26. // during the iteration, but will not re-visit indices whose values
  27. // change after visiting. Thus SparseSet can be a convenient
  28. // implementation of a work queue.
  29. //
  30. // The SparseSet implementation is NOT thread-safe. It is up to the
  31. // caller to make sure only one thread is accessing the set. (Typically
  32. // these sets are temporary values and used in situations where speed is
  33. // important.)
  34. //
  35. // The SparseSet interface does not present all the usual STL bells and
  36. // whistles.
  37. //
  38. // Implemented with reference to Briggs & Torczon, An Efficient
  39. // Representation for Sparse Sets, ACM Letters on Programming Languages
  40. // and Systems, Volume 2, Issue 1-4 (March-Dec. 1993), pp. 59-69.
  41. //
  42. // This is a specialization of sparse array; see sparse_array.h.
  43. // IMPLEMENTATION
  44. //
  45. // See sparse_array.h for implementation details.
  46. // Doing this simplifies the logic below.
  47. #ifndef __has_feature
  48. #define __has_feature(x) 0
  49. #endif
  50. #include <assert.h>
  51. #include <stdint.h>
  52. #if __has_feature(memory_sanitizer)
  53. #include <sanitizer/msan_interface.h>
  54. #endif
  55. #include <algorithm>
  56. #include <memory>
  57. #include <utility>
  58. #include "re2/pod_array.h"
  59. namespace re2 {
  60. template<typename Value>
  61. class SparseSetT {
  62. public:
  63. SparseSetT();
  64. explicit SparseSetT(int max_size);
  65. ~SparseSetT();
  66. typedef int* iterator;
  67. typedef const int* const_iterator;
  68. // Return the number of entries in the set.
  69. int size() const {
  70. return size_;
  71. }
  72. // Indicate whether the set is empty.
  73. int empty() const {
  74. return size_ == 0;
  75. }
  76. // Iterate over the set.
  77. iterator begin() {
  78. return dense_.data();
  79. }
  80. iterator end() {
  81. return dense_.data() + size_;
  82. }
  83. const_iterator begin() const {
  84. return dense_.data();
  85. }
  86. const_iterator end() const {
  87. return dense_.data() + size_;
  88. }
  89. // Change the maximum size of the set.
  90. // Invalidates all iterators.
  91. void resize(int new_max_size);
  92. // Return the maximum size of the set.
  93. // Indices can be in the range [0, max_size).
  94. int max_size() const {
  95. if (dense_.data() != NULL)
  96. return dense_.size();
  97. else
  98. return 0;
  99. }
  100. // Clear the set.
  101. void clear() {
  102. size_ = 0;
  103. }
  104. // Check whether index i is in the set.
  105. bool contains(int i) const;
  106. // Comparison function for sorting.
  107. // Can sort the sparse set so that future iterations
  108. // will visit indices in increasing order using
  109. // std::sort(arr.begin(), arr.end(), arr.less);
  110. static bool less(int a, int b);
  111. public:
  112. // Insert index i into the set.
  113. iterator insert(int i) {
  114. return InsertInternal(true, i);
  115. }
  116. // Insert index i into the set.
  117. // Fast but unsafe: only use if contains(i) is false.
  118. iterator insert_new(int i) {
  119. return InsertInternal(false, i);
  120. }
  121. private:
  122. iterator InsertInternal(bool allow_existing, int i) {
  123. DebugCheckInvariants();
  124. if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
  125. assert(false && "illegal index");
  126. // Semantically, end() would be better here, but we already know
  127. // the user did something stupid, so begin() insulates them from
  128. // dereferencing an invalid pointer.
  129. return begin();
  130. }
  131. if (!allow_existing) {
  132. assert(!contains(i));
  133. create_index(i);
  134. } else {
  135. if (!contains(i))
  136. create_index(i);
  137. }
  138. DebugCheckInvariants();
  139. return dense_.data() + sparse_[i];
  140. }
  141. // Add the index i to the set.
  142. // Only use if contains(i) is known to be false.
  143. // This function is private, only intended as a helper
  144. // for other methods.
  145. void create_index(int i);
  146. // In debug mode, verify that some invariant properties of the class
  147. // are being maintained. This is called at the end of the constructor
  148. // and at the beginning and end of all public non-const member functions.
  149. void DebugCheckInvariants() const;
  150. // Initializes memory for elements [min, max).
  151. void MaybeInitializeMemory(int min, int max) {
  152. #if __has_feature(memory_sanitizer)
  153. __msan_unpoison(sparse_.data() + min, (max - min) * sizeof sparse_[0]);
  154. #elif defined(RE2_ON_VALGRIND)
  155. for (int i = min; i < max; i++) {
  156. sparse_[i] = 0xababababU;
  157. }
  158. #endif
  159. }
  160. int size_ = 0;
  161. PODArray<int> sparse_;
  162. PODArray<int> dense_;
  163. };
  164. template<typename Value>
  165. SparseSetT<Value>::SparseSetT() = default;
  166. // Change the maximum size of the set.
  167. // Invalidates all iterators.
  168. template<typename Value>
  169. void SparseSetT<Value>::resize(int new_max_size) {
  170. DebugCheckInvariants();
  171. if (new_max_size > max_size()) {
  172. const int old_max_size = max_size();
  173. // Construct these first for exception safety.
  174. PODArray<int> a(new_max_size);
  175. PODArray<int> b(new_max_size);
  176. std::copy_n(sparse_.data(), old_max_size, a.data());
  177. std::copy_n(dense_.data(), old_max_size, b.data());
  178. sparse_ = std::move(a);
  179. dense_ = std::move(b);
  180. MaybeInitializeMemory(old_max_size, new_max_size);
  181. }
  182. if (size_ > new_max_size)
  183. size_ = new_max_size;
  184. DebugCheckInvariants();
  185. }
  186. // Check whether index i is in the set.
  187. template<typename Value>
  188. bool SparseSetT<Value>::contains(int i) const {
  189. assert(i >= 0);
  190. assert(i < max_size());
  191. if (static_cast<uint32_t>(i) >= static_cast<uint32_t>(max_size())) {
  192. return false;
  193. }
  194. // Unsigned comparison avoids checking sparse_[i] < 0.
  195. return (uint32_t)sparse_[i] < (uint32_t)size_ &&
  196. dense_[sparse_[i]] == i;
  197. }
  198. template<typename Value>
  199. void SparseSetT<Value>::create_index(int i) {
  200. assert(!contains(i));
  201. assert(size_ < max_size());
  202. sparse_[i] = size_;
  203. dense_[size_] = i;
  204. size_++;
  205. }
  206. template<typename Value> SparseSetT<Value>::SparseSetT(int max_size) :
  207. sparse_(max_size), dense_(max_size) {
  208. MaybeInitializeMemory(size_, max_size);
  209. DebugCheckInvariants();
  210. }
  211. template<typename Value> SparseSetT<Value>::~SparseSetT() {
  212. DebugCheckInvariants();
  213. }
  214. template<typename Value> void SparseSetT<Value>::DebugCheckInvariants() const {
  215. assert(0 <= size_);
  216. assert(size_ <= max_size());
  217. }
  218. // Comparison function for sorting.
  219. template<typename Value> bool SparseSetT<Value>::less(int a, int b) {
  220. return a < b;
  221. }
  222. typedef SparseSetT<void> SparseSet;
  223. } // namespace re2
  224. #endif // RE2_SPARSE_SET_H_