roaring.hh 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031
  1. /*
  2. A C++ header for Roaring Bitmaps.
  3. */
  4. #ifndef INCLUDE_ROARING_HH_
  5. #define INCLUDE_ROARING_HH_
  6. #include <algorithm>
  7. #include <cstdarg>
  8. #include <initializer_list>
  9. #include <limits>
  10. #include <new>
  11. #include <stdexcept>
  12. #include <string>
  13. #if !defined(ROARING_EXCEPTIONS)
  14. // __cpp_exceptions is required by C++98 and we require C++11 or better.
  15. #ifndef __cpp_exceptions
  16. #error "__cpp_exceptions should be defined"
  17. #endif
  18. #if __cpp_exceptions
  19. #define ROARING_EXCEPTIONS 1
  20. #else
  21. #define ROARING_EXCEPTIONS 0
  22. #endif
  23. #endif
  24. #ifndef ROARING_TERMINATE
  25. #if ROARING_EXCEPTIONS
  26. #define ROARING_TERMINATE(_s) throw std::runtime_error(_s)
  27. #else
  28. #define ROARING_TERMINATE(_s) std::terminate()
  29. #endif
  30. #endif
  31. #define ROARING_API_NOT_IN_GLOBAL_NAMESPACE // see remarks in roaring.h
  32. #include <roaring/roaring.h>
  33. #undef ROARING_API_NOT_IN_GLOBAL_NAMESPACE
  34. #include <roaring/roaring_array.h> // roaring::internal array functions used
  35. namespace roaring {
  36. class RoaringSetBitBiDirectionalIterator;
  37. /** DEPRECATED, use `RoaringSetBitBiDirectionalIterator`. */
  38. using RoaringSetBitForwardIterator = RoaringSetBitBiDirectionalIterator;
  39. /**
  40. * A bit of context usable with `*Bulk()` functions.
  41. *
  42. * A context may only be used with a single bitmap, and any modification to a
  43. * bitmap (other than modifications performed with `Bulk()` functions with the
  44. * context passed) will invalidate any contexts associated with that bitmap.
  45. */
  46. class BulkContext {
  47. public:
  48. friend class Roaring;
  49. using roaring_bitmap_bulk_context_t = api::roaring_bulk_context_t;
  50. BulkContext() : context_{nullptr, 0, 0, 0} {}
  51. BulkContext(const BulkContext &) = delete;
  52. BulkContext &operator=(const BulkContext &) = delete;
  53. BulkContext(BulkContext &&) noexcept = default;
  54. BulkContext &operator=(BulkContext &&) noexcept = default;
  55. private:
  56. roaring_bitmap_bulk_context_t context_;
  57. };
  58. class Roaring {
  59. typedef api::roaring_bitmap_t roaring_bitmap_t; // class-local name alias
  60. public:
  61. /**
  62. * Create an empty bitmap in the existing memory for the class.
  63. * The bitmap will be in the "clear" state with no auxiliary allocations.
  64. */
  65. Roaring() : roaring{} {
  66. // The empty constructor roaring{} silences warnings from pedantic
  67. // static analyzers.
  68. api::roaring_bitmap_init_cleared(&roaring);
  69. }
  70. /**
  71. * Construct a bitmap from a list of 32-bit integer values.
  72. */
  73. Roaring(size_t n, const uint32_t *data) : Roaring() {
  74. api::roaring_bitmap_add_many(&roaring, n, data);
  75. }
  76. /**
  77. * Construct a bitmap from an initializer list.
  78. */
  79. Roaring(std::initializer_list<uint32_t> l) : Roaring() {
  80. addMany(l.size(), l.begin());
  81. }
  82. /**
  83. * Construct a roaring object by taking control of a malloc()'d C struct.
  84. *
  85. * Passing a NULL pointer is unsafe.
  86. * The pointer to the C struct will be invalid after the call.
  87. */
  88. explicit Roaring(roaring_bitmap_t *s) noexcept : roaring(*s) {
  89. roaring_free(s); // deallocate the passed-in pointer
  90. }
  91. /**
  92. * Copy constructor.
  93. * It may throw std::runtime_error if there is insufficient memory.
  94. */
  95. Roaring(const Roaring &r) : Roaring() {
  96. if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
  97. ROARING_TERMINATE("failed roaring_bitmap_overwrite in constructor");
  98. }
  99. api::roaring_bitmap_set_copy_on_write(
  100. &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring));
  101. }
  102. /**
  103. * Move constructor. The moved-from object remains valid but empty, i.e.
  104. * it behaves as though it was just freshly constructed.
  105. */
  106. Roaring(Roaring &&r) noexcept : roaring(r.roaring) {
  107. //
  108. // !!! This clones the bits of the roaring structure to a new location
  109. // and then overwrites the old bits...assuming that this will still
  110. // work. There are scenarios where this could break; e.g. if some of
  111. // those bits were pointers into the structure memory itself. If such
  112. // things were possible, a roaring_bitmap_move() API would be needed.
  113. //
  114. api::roaring_bitmap_init_cleared(&r.roaring);
  115. }
  116. /**
  117. * Construct a bitmap from a list of uint32_t values.
  118. */
  119. static Roaring bitmapOf(size_t n, ...) {
  120. Roaring ans;
  121. va_list vl;
  122. va_start(vl, n);
  123. for (size_t i = 0; i < n; i++) {
  124. ans.add(va_arg(vl, uint32_t));
  125. }
  126. va_end(vl);
  127. return ans;
  128. }
  129. /**
  130. * Copies the content of the provided bitmap, and
  131. * discard the current content.
  132. * It may throw std::runtime_error if there is insufficient memory.
  133. */
  134. Roaring &operator=(const Roaring &r) {
  135. if (!api::roaring_bitmap_overwrite(&roaring, &r.roaring)) {
  136. ROARING_TERMINATE("failed memory alloc in assignment");
  137. }
  138. api::roaring_bitmap_set_copy_on_write(
  139. &roaring, api::roaring_bitmap_get_copy_on_write(&r.roaring));
  140. return *this;
  141. }
  142. /**
  143. * Moves the content of the provided bitmap, and
  144. * discard the current content.
  145. */
  146. Roaring &operator=(Roaring &&r) noexcept {
  147. api::roaring_bitmap_clear(&roaring); // free this class's allocations
  148. // !!! See notes in the Move Constructor regarding roaring_bitmap_move()
  149. //
  150. roaring = r.roaring;
  151. api::roaring_bitmap_init_cleared(&r.roaring);
  152. return *this;
  153. }
  154. /**
  155. * Assignment from an initializer list.
  156. */
  157. Roaring &operator=(std::initializer_list<uint32_t> l) {
  158. // Delegate to move assignment operator
  159. *this = Roaring(l);
  160. return *this;
  161. }
  162. /**
  163. * Construct a bitmap from a list of uint32_t values.
  164. * E.g., bitmapOfList({1,2,3}).
  165. */
  166. static Roaring bitmapOfList(std::initializer_list<uint32_t> l) {
  167. Roaring ans;
  168. ans.addMany(l.size(), l.begin());
  169. return ans;
  170. }
  171. /**
  172. * Add value x
  173. */
  174. void add(uint32_t x) noexcept { api::roaring_bitmap_add(&roaring, x); }
  175. /**
  176. * Add value x
  177. * Returns true if a new value was added, false if the value was already
  178. * existing.
  179. */
  180. bool addChecked(uint32_t x) noexcept {
  181. return api::roaring_bitmap_add_checked(&roaring, x);
  182. }
  183. /**
  184. * Add all values in range [min, max)
  185. */
  186. void addRange(const uint64_t min, const uint64_t max) noexcept {
  187. return api::roaring_bitmap_add_range(&roaring, min, max);
  188. }
  189. /**
  190. * Add all values in range [min, max]
  191. */
  192. void addRangeClosed(const uint32_t min, const uint32_t max) noexcept {
  193. return api::roaring_bitmap_add_range_closed(&roaring, min, max);
  194. }
  195. /**
  196. * Add value n_args from pointer vals
  197. */
  198. void addMany(size_t n_args, const uint32_t *vals) noexcept {
  199. api::roaring_bitmap_add_many(&roaring, n_args, vals);
  200. }
  201. /**
  202. * Add value val, using context from a previous insert for speed
  203. * optimization.
  204. *
  205. * `context` will be used to store information between calls to make bulk
  206. * operations faster. `context` should be default-initialized before the
  207. * first call to this function.
  208. */
  209. void addBulk(BulkContext &context, uint32_t x) noexcept {
  210. api::roaring_bitmap_add_bulk(&roaring, &context.context_, x);
  211. }
  212. /**
  213. * Check if item x is present, using context from a previous insert or
  214. * search for speed optimization.
  215. *
  216. * `context` will be used to store information between calls to make bulk
  217. * operations faster. `context` should be default-initialized before the
  218. * first call to this function.
  219. */
  220. bool containsBulk(BulkContext &context, uint32_t x) const noexcept {
  221. return api::roaring_bitmap_contains_bulk(&roaring, &context.context_,
  222. x);
  223. }
  224. /**
  225. * Remove value x
  226. */
  227. void remove(uint32_t x) noexcept {
  228. api::roaring_bitmap_remove(&roaring, x);
  229. }
  230. /**
  231. * Remove value x
  232. * Returns true if a new value was removed, false if the value was not
  233. * existing.
  234. */
  235. bool removeChecked(uint32_t x) noexcept {
  236. return api::roaring_bitmap_remove_checked(&roaring, x);
  237. }
  238. /**
  239. * Remove all values in range [min, max)
  240. */
  241. void removeRange(uint64_t min, uint64_t max) noexcept {
  242. return api::roaring_bitmap_remove_range(&roaring, min, max);
  243. }
  244. /**
  245. * Remove all values in range [min, max]
  246. */
  247. void removeRangeClosed(uint32_t min, uint32_t max) noexcept {
  248. return api::roaring_bitmap_remove_range_closed(&roaring, min, max);
  249. }
  250. /**
  251. * Clears the bitmap.
  252. */
  253. void clear() { api::roaring_bitmap_clear(&roaring); }
  254. /**
  255. * Return the largest value (if not empty)
  256. */
  257. uint32_t maximum() const noexcept {
  258. return api::roaring_bitmap_maximum(&roaring);
  259. }
  260. /**
  261. * Return the smallest value (if not empty)
  262. */
  263. uint32_t minimum() const noexcept {
  264. return api::roaring_bitmap_minimum(&roaring);
  265. }
  266. /**
  267. * Check if value x is present
  268. */
  269. bool contains(uint32_t x) const noexcept {
  270. return api::roaring_bitmap_contains(&roaring, x);
  271. }
  272. /**
  273. * Check if all values from x (included) to y (excluded) are present
  274. */
  275. bool containsRange(const uint64_t x, const uint64_t y) const noexcept {
  276. return api::roaring_bitmap_contains_range(&roaring, x, y);
  277. }
  278. bool containsRangeClosed(const uint32_t x,
  279. const uint32_t y) const noexcept {
  280. return api::roaring_bitmap_contains_range_closed(&roaring, x, y);
  281. }
  282. /**
  283. * Compute the intersection between the current bitmap and the provided
  284. * bitmap, writing the result in the current bitmap. The provided bitmap
  285. * is not modified.
  286. *
  287. * Performance hint: if you are computing the intersection between several
  288. * bitmaps, two-by-two, it is best to start with the smallest bitmap.
  289. */
  290. Roaring &operator&=(const Roaring &r) noexcept {
  291. api::roaring_bitmap_and_inplace(&roaring, &r.roaring);
  292. return *this;
  293. }
  294. /**
  295. * Compute the difference between the current bitmap and the provided
  296. * bitmap, writing the result in the current bitmap. The provided bitmap
  297. * is not modified.
  298. */
  299. Roaring &operator-=(const Roaring &r) noexcept {
  300. api::roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
  301. return *this;
  302. }
  303. /**
  304. * Compute the union between the current bitmap and the provided bitmap,
  305. * writing the result in the current bitmap. The provided bitmap is not
  306. * modified.
  307. *
  308. * See also the fastunion function to aggregate many bitmaps more quickly.
  309. */
  310. Roaring &operator|=(const Roaring &r) noexcept {
  311. api::roaring_bitmap_or_inplace(&roaring, &r.roaring);
  312. return *this;
  313. }
  314. /**
  315. * Compute the symmetric union between the current bitmap and the provided
  316. * bitmap, writing the result in the current bitmap. The provided bitmap
  317. * is not modified.
  318. */
  319. Roaring &operator^=(const Roaring &r) noexcept {
  320. api::roaring_bitmap_xor_inplace(&roaring, &r.roaring);
  321. return *this;
  322. }
  323. /**
  324. * Exchange the content of this bitmap with another.
  325. */
  326. void swap(Roaring &r) noexcept { std::swap(r.roaring, roaring); }
  327. /**
  328. * Get the cardinality of the bitmap (number of elements).
  329. */
  330. uint64_t cardinality() const noexcept {
  331. return api::roaring_bitmap_get_cardinality(&roaring);
  332. }
  333. /**
  334. * Returns true if the bitmap is empty (cardinality is zero).
  335. */
  336. bool isEmpty() const noexcept {
  337. return api::roaring_bitmap_is_empty(&roaring);
  338. }
  339. /**
  340. * Returns true if the bitmap is full (cardinality is uint32_t max + 1).
  341. * we put std::numeric_limits<>::max/min in parentheses
  342. * to avoid a clash with the Windows.h header under Windows.
  343. */
  344. bool isFull() const noexcept {
  345. return api::roaring_bitmap_get_cardinality(&roaring) ==
  346. ((uint64_t)(std::numeric_limits<uint32_t>::max)()) + 1;
  347. }
  348. /**
  349. * Returns true if the bitmap is subset of the other.
  350. */
  351. bool isSubset(const Roaring &r) const noexcept {
  352. return api::roaring_bitmap_is_subset(&roaring, &r.roaring);
  353. }
  354. /**
  355. * Returns true if the bitmap is strict subset of the other.
  356. */
  357. bool isStrictSubset(const Roaring &r) const noexcept {
  358. return api::roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
  359. }
  360. /**
  361. * Convert the bitmap to an array. Write the output to "ans", caller is
  362. * responsible to ensure that there is enough memory allocated
  363. * (e.g., ans = new uint32[mybitmap.cardinality()];)
  364. */
  365. void toUint32Array(uint32_t *ans) const noexcept {
  366. api::roaring_bitmap_to_uint32_array(&roaring, ans);
  367. }
  368. /**
  369. * To int array with pagination
  370. */
  371. void rangeUint32Array(uint32_t *ans, size_t offset,
  372. size_t limit) const noexcept {
  373. api::roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
  374. }
  375. /**
  376. * Return true if the two bitmaps contain the same elements.
  377. */
  378. bool operator==(const Roaring &r) const noexcept {
  379. return api::roaring_bitmap_equals(&roaring, &r.roaring);
  380. }
  381. /**
  382. * Compute the negation of the roaring bitmap within the half-open interval
  383. * [range_start, range_end). Areas outside the interval are unchanged.
  384. */
  385. void flip(uint64_t range_start, uint64_t range_end) noexcept {
  386. api::roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
  387. }
  388. /**
  389. * Compute the negation of the roaring bitmap within the closed interval
  390. * [range_start, range_end]. Areas outside the interval are unchanged.
  391. */
  392. void flipClosed(uint32_t range_start, uint32_t range_end) noexcept {
  393. api::roaring_bitmap_flip_inplace_closed(&roaring, range_start,
  394. range_end);
  395. }
  396. /**
  397. * Remove run-length encoding even when it is more space efficient.
  398. * Return whether a change was applied.
  399. */
  400. bool removeRunCompression() noexcept {
  401. return api::roaring_bitmap_remove_run_compression(&roaring);
  402. }
  403. /**
  404. * Convert array and bitmap containers to run containers when it is more
  405. * efficient; also convert from run containers when more space efficient.
  406. * Returns true if the result has at least one run container. Additional
  407. * savings might be possible by calling shrinkToFit().
  408. */
  409. bool runOptimize() noexcept {
  410. return api::roaring_bitmap_run_optimize(&roaring);
  411. }
  412. /**
  413. * If needed, reallocate memory to shrink the memory usage. Returns
  414. * the number of bytes saved.
  415. */
  416. size_t shrinkToFit() noexcept {
  417. return api::roaring_bitmap_shrink_to_fit(&roaring);
  418. }
  419. /**
  420. * Iterate over the bitmap elements. The function iterator is called once
  421. * for all the values with ptr (can be NULL) as the second parameter of
  422. * each call.
  423. *
  424. * roaring_iterator is simply a pointer to a function that returns bool
  425. * (true means that the iteration should continue while false means that it
  426. * should stop), and takes (uint32_t,void*) as inputs.
  427. */
  428. void iterate(api::roaring_iterator iterator, void *ptr) const {
  429. api::roaring_iterate(&roaring, iterator, ptr);
  430. }
  431. /**
  432. * Selects the value at index rnk in the bitmap, where the smallest value
  433. * is at index 0.
  434. *
  435. * If the size of the roaring bitmap is strictly greater than rank, then
  436. * this function returns true and sets element to the element of given rank.
  437. * Otherwise, it returns false.
  438. */
  439. bool select(uint32_t rnk, uint32_t *element) const noexcept {
  440. return api::roaring_bitmap_select(&roaring, rnk, element);
  441. }
  442. /**
  443. * Computes the size of the intersection between two bitmaps.
  444. */
  445. uint64_t and_cardinality(const Roaring &r) const noexcept {
  446. return api::roaring_bitmap_and_cardinality(&roaring, &r.roaring);
  447. }
  448. /**
  449. * Check whether the two bitmaps intersect.
  450. */
  451. bool intersect(const Roaring &r) const noexcept {
  452. return api::roaring_bitmap_intersect(&roaring, &r.roaring);
  453. }
  454. /**
  455. * Computes the Jaccard index between two bitmaps. (Also known as the
  456. * Tanimoto distance,
  457. * or the Jaccard similarity coefficient)
  458. *
  459. * The Jaccard index is undefined if both bitmaps are empty.
  460. */
  461. double jaccard_index(const Roaring &r) const noexcept {
  462. return api::roaring_bitmap_jaccard_index(&roaring, &r.roaring);
  463. }
  464. /**
  465. * Computes the size of the union between two bitmaps.
  466. */
  467. uint64_t or_cardinality(const Roaring &r) const noexcept {
  468. return api::roaring_bitmap_or_cardinality(&roaring, &r.roaring);
  469. }
  470. /**
  471. * Computes the size of the difference (andnot) between two bitmaps.
  472. */
  473. uint64_t andnot_cardinality(const Roaring &r) const noexcept {
  474. return api::roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
  475. }
  476. /**
  477. * Computes the size of the symmetric difference (andnot) between two
  478. * bitmaps.
  479. */
  480. uint64_t xor_cardinality(const Roaring &r) const noexcept {
  481. return api::roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
  482. }
  483. /**
  484. * Returns the number of integers that are smaller or equal to x.
  485. * Thus the rank of the smallest element is one. If
  486. * x is smaller than the smallest element, this function will return 0.
  487. * The rank and select functions differ in convention: this function returns
  488. * 1 when ranking the smallest value, but the select function returns the
  489. * smallest value when using index 0.
  490. */
  491. uint64_t rank(uint32_t x) const noexcept {
  492. return api::roaring_bitmap_rank(&roaring, x);
  493. }
  494. /**
  495. * Get `rank()` values in bulk. The values in `[begin .. end)` must be in
  496. * Ascending order. possible implementation: for(auto* iter = begin; iter !=
  497. * end; ++iter) *(ans++) = rank(*iter);
  498. */
  499. void rank_many(const uint32_t *begin, const uint32_t *end,
  500. uint64_t *ans) const noexcept {
  501. return api::roaring_bitmap_rank_many(&roaring, begin, end, ans);
  502. }
  503. /**
  504. * Returns the index of x in the set, index start from 0.
  505. * If the set doesn't contain x , this function will return -1.
  506. * The difference with rank function is that this function will return -1
  507. * when x isn't in the set, but the rank function will return a
  508. * non-negative number.
  509. */
  510. int64_t getIndex(uint32_t x) const noexcept {
  511. return api::roaring_bitmap_get_index(&roaring, x);
  512. }
  513. /**
  514. * Write a bitmap to a char buffer. This is meant to be compatible with
  515. * the Java and Go versions. Returns how many bytes were written which
  516. * should be getSizeInBytes().
  517. *
  518. * Setting the portable flag to false enable a custom format that
  519. * can save space compared to the portable format (e.g., for very
  520. * sparse bitmaps).
  521. *
  522. * Boost users can serialize bitmaps in this manner:
  523. *
  524. * BOOST_SERIALIZATION_SPLIT_FREE(Roaring)
  525. * namespace boost {
  526. * namespace serialization {
  527. *
  528. * template <class Archive>
  529. * void save(Archive& ar, const Roaring& bitmask,
  530. * const unsigned int version) {
  531. * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes();
  532. * std::vector<char> buffer(expected_size_in_bytes);
  533. * std::size_t size_in_bytes = bitmask.write(buffer.data());
  534. *
  535. * ar& size_in_bytes;
  536. * ar& boost::serialization::make_binary_object(buffer.data(),
  537. * size_in_bytes);
  538. * }
  539. * template <class Archive>
  540. * void load(Archive& ar, Roaring& bitmask,
  541. * const unsigned int version) {
  542. * std::size_t size_in_bytes = 0;
  543. * ar& size_in_bytes;
  544. * std::vector<char> buffer(size_in_bytes);
  545. * ar& boost::serialization::make_binary_object(buffer.data(),
  546. * size_in_bytes);
  547. * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes);
  548. * }
  549. * } // namespace serialization
  550. * } // namespace boost
  551. */
  552. size_t write(char *buf, bool portable = true) const noexcept {
  553. if (portable) {
  554. return api::roaring_bitmap_portable_serialize(&roaring, buf);
  555. } else {
  556. return api::roaring_bitmap_serialize(&roaring, buf);
  557. }
  558. }
  559. /**
  560. * Read a bitmap from a serialized version. This is meant to be compatible
  561. * with the Java and Go versions.
  562. *
  563. * Setting the portable flag to false enable a custom format that
  564. * can save space compared to the portable format (e.g., for very
  565. * sparse bitmaps).
  566. *
  567. * This function is unsafe in the sense that if you provide bad data,
  568. * many, many bytes could be read. See also readSafe.
  569. *
  570. * The function may throw std::runtime_error if a bitmap could not be read.
  571. * Not that even if it does not throw, the bitmap could still be unusable if
  572. * the loaded data does not match the portable Roaring specification: you
  573. * should ensure that the data you load come from a serialized bitmap.
  574. */
  575. static Roaring read(const char *buf, bool portable = true) {
  576. roaring_bitmap_t *r =
  577. portable ? api::roaring_bitmap_portable_deserialize(buf)
  578. : api::roaring_bitmap_deserialize(buf);
  579. if (r == NULL) {
  580. ROARING_TERMINATE("failed alloc while reading");
  581. }
  582. return Roaring(r);
  583. }
  584. /**
  585. * Read a bitmap from a serialized version, reading no more than maxbytes
  586. * bytes. This is meant to be compatible with the Java and Go versions.
  587. * The function itself is safe in the sense that it will not cause buffer
  588. * overflows. However, for correct operations, it is assumed that the bitmap
  589. * read was once serialized from a valid bitmap. If you provided an
  590. * incorrect input (garbage), then the bitmap read may not be in a valid
  591. * state and following operations may not lead to sensible results. It is
  592. * your responsability to ensure that the input bytes follow the format
  593. * specification if you want a usable bitmap:
  594. * https://github.com/RoaringBitmap/RoaringFormatSpec
  595. * In particular, the serialized array containers need to be in sorted
  596. * order, and the run containers should be in sorted non-overlapping order.
  597. * This is is guaranteed to happen when serializing an existing bitmap, but
  598. * not for random inputs. Note that this function assumes that your bitmap
  599. * was serialized in *portable* mode (which is the default with the 'write'
  600. * method).
  601. *
  602. * The function may throw std::runtime_error if a bitmap could not be read.
  603. * Not that even if it does not throw, the bitmap could still be unusable if
  604. * the loaded data does not match the portable Roaring specification: you
  605. * should ensure that the data you load come from a serialized bitmap.
  606. */
  607. static Roaring readSafe(const char *buf, size_t maxbytes) {
  608. roaring_bitmap_t *r =
  609. api::roaring_bitmap_portable_deserialize_safe(buf, maxbytes);
  610. if (r == NULL) {
  611. ROARING_TERMINATE("failed alloc while reading");
  612. }
  613. return Roaring(r);
  614. }
  615. /**
  616. * How many bytes are required to serialize this bitmap (meant to be
  617. * compatible with Java and Go versions)
  618. *
  619. * Setting the portable flag to false enable a custom format that
  620. * can save space compared to the portable format (e.g., for very
  621. * sparse bitmaps).
  622. */
  623. size_t getSizeInBytes(bool portable = true) const noexcept {
  624. if (portable) {
  625. return api::roaring_bitmap_portable_size_in_bytes(&roaring);
  626. } else {
  627. return api::roaring_bitmap_size_in_bytes(&roaring);
  628. }
  629. }
  630. /**
  631. * For advanced users.
  632. * This function may throw std::runtime_error.
  633. */
  634. static const Roaring frozenView(const char *buf, size_t length) {
  635. const roaring_bitmap_t *s =
  636. api::roaring_bitmap_frozen_view(buf, length);
  637. if (s == NULL) {
  638. ROARING_TERMINATE("failed to read frozen bitmap");
  639. }
  640. Roaring r;
  641. r.roaring = *s;
  642. return r;
  643. }
  644. /**
  645. * For advanced users; see roaring_bitmap_portable_deserialize_frozen.
  646. * This function may throw std::runtime_error.
  647. */
  648. static const Roaring portableDeserializeFrozen(const char *buf) {
  649. const roaring_bitmap_t *s =
  650. api::roaring_bitmap_portable_deserialize_frozen(buf);
  651. if (s == NULL) {
  652. ROARING_TERMINATE("failed to read portable frozen bitmap");
  653. }
  654. Roaring r;
  655. r.roaring = *s;
  656. return r;
  657. }
  658. /**
  659. * For advanced users.
  660. */
  661. void writeFrozen(char *buf) const noexcept {
  662. roaring_bitmap_frozen_serialize(&roaring, buf);
  663. }
  664. /**
  665. * For advanced users.
  666. */
  667. size_t getFrozenSizeInBytes() const noexcept {
  668. return roaring_bitmap_frozen_size_in_bytes(&roaring);
  669. }
  670. /**
  671. * Computes the intersection between two bitmaps and returns new bitmap.
  672. * The current bitmap and the provided bitmap are unchanged.
  673. *
  674. * Performance hint: if you are computing the intersection between several
  675. * bitmaps, two-by-two, it is best to start with the smallest bitmap.
  676. * Consider also using the operator &= to avoid needlessly creating
  677. * many temporary bitmaps.
  678. * This function may throw std::runtime_error.
  679. */
  680. Roaring operator&(const Roaring &o) const {
  681. roaring_bitmap_t *r = api::roaring_bitmap_and(&roaring, &o.roaring);
  682. if (r == NULL) {
  683. ROARING_TERMINATE("failed materalization in and");
  684. }
  685. return Roaring(r);
  686. }
  687. /**
  688. * Computes the difference between two bitmaps and returns new bitmap.
  689. * The current bitmap and the provided bitmap are unchanged.
  690. * This function may throw std::runtime_error.
  691. */
  692. Roaring operator-(const Roaring &o) const {
  693. roaring_bitmap_t *r = api::roaring_bitmap_andnot(&roaring, &o.roaring);
  694. if (r == NULL) {
  695. ROARING_TERMINATE("failed materalization in andnot");
  696. }
  697. return Roaring(r);
  698. }
  699. /**
  700. * Computes the union between two bitmaps and returns new bitmap.
  701. * The current bitmap and the provided bitmap are unchanged.
  702. * This function may throw std::runtime_error.
  703. */
  704. Roaring operator|(const Roaring &o) const {
  705. roaring_bitmap_t *r = api::roaring_bitmap_or(&roaring, &o.roaring);
  706. if (r == NULL) {
  707. ROARING_TERMINATE("failed materalization in or");
  708. }
  709. return Roaring(r);
  710. }
  711. /**
  712. * Computes the symmetric union between two bitmaps and returns new bitmap.
  713. * The current bitmap and the provided bitmap are unchanged.
  714. * This function may throw std::runtime_error.
  715. */
  716. Roaring operator^(const Roaring &o) const {
  717. roaring_bitmap_t *r = api::roaring_bitmap_xor(&roaring, &o.roaring);
  718. if (r == NULL) {
  719. ROARING_TERMINATE("failed materalization in xor");
  720. }
  721. return Roaring(r);
  722. }
  723. /**
  724. * Whether or not we apply copy and write.
  725. */
  726. void setCopyOnWrite(bool val) noexcept {
  727. api::roaring_bitmap_set_copy_on_write(&roaring, val);
  728. }
  729. /**
  730. * Print the content of the bitmap
  731. */
  732. void printf() const noexcept { api::roaring_bitmap_printf(&roaring); }
  733. /**
  734. * Print the content of the bitmap into a string
  735. */
  736. std::string toString() const noexcept {
  737. struct iter_data {
  738. std::string str{}; // The empty constructor silences warnings from
  739. // pedantic static analyzers.
  740. char first_char = '{';
  741. } outer_iter_data;
  742. if (!isEmpty()) {
  743. iterate(
  744. [](uint32_t value, void *inner_iter_data) -> bool {
  745. ((iter_data *)inner_iter_data)->str +=
  746. ((iter_data *)inner_iter_data)->first_char;
  747. ((iter_data *)inner_iter_data)->str +=
  748. std::to_string(value);
  749. ((iter_data *)inner_iter_data)->first_char = ',';
  750. return true;
  751. },
  752. (void *)&outer_iter_data);
  753. } else
  754. outer_iter_data.str = '{';
  755. outer_iter_data.str += '}';
  756. return outer_iter_data.str;
  757. }
  758. /**
  759. * Whether or not copy and write is active.
  760. */
  761. bool getCopyOnWrite() const noexcept {
  762. return api::roaring_bitmap_get_copy_on_write(&roaring);
  763. }
  764. /**
  765. * Computes the logical or (union) between "n" bitmaps (referenced by a
  766. * pointer).
  767. * This function may throw std::runtime_error.
  768. */
  769. static Roaring fastunion(size_t n, const Roaring **inputs) {
  770. const roaring_bitmap_t **x = (const roaring_bitmap_t **)roaring_malloc(
  771. n * sizeof(roaring_bitmap_t *));
  772. if (x == NULL) {
  773. ROARING_TERMINATE("failed memory alloc in fastunion");
  774. }
  775. for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring;
  776. roaring_bitmap_t *c_ans = api::roaring_bitmap_or_many(n, x);
  777. if (c_ans == NULL) {
  778. roaring_free(x);
  779. ROARING_TERMINATE("failed memory alloc in fastunion");
  780. }
  781. Roaring ans(c_ans);
  782. roaring_free(x);
  783. return ans;
  784. }
  785. /**
  786. * Destructor. By contract, calling roaring_bitmap_clear() is enough to
  787. * release all auxiliary memory used by the structure.
  788. */
  789. ~Roaring() {
  790. if (!(roaring.high_low_container.flags & ROARING_FLAG_FROZEN)) {
  791. api::roaring_bitmap_clear(&roaring);
  792. } else {
  793. // The roaring member variable copies the `roaring_bitmap_t` and
  794. // nested `roaring_array_t` structures by value and is freed in the
  795. // constructor, however the underlying memory arena used for the
  796. // container data is not freed with it. Here we derive the arena
  797. // pointer from the second arena allocation in
  798. // `roaring_bitmap_frozen_view` and free it as well.
  799. roaring_bitmap_free(
  800. (roaring_bitmap_t *)((char *)
  801. roaring.high_low_container.containers -
  802. sizeof(roaring_bitmap_t)));
  803. }
  804. }
  805. friend class RoaringSetBitBiDirectionalIterator;
  806. typedef RoaringSetBitBiDirectionalIterator const_iterator;
  807. typedef RoaringSetBitBiDirectionalIterator const_bidirectional_iterator;
  808. /**
  809. * Returns an iterator that can be used to access the position of the set
  810. * bits. The running time complexity of a full scan is proportional to the
  811. * number of set bits: be aware that if you have long strings of 1s, this
  812. * can be very inefficient.
  813. *
  814. * It can be much faster to use the toArray method if you want to retrieve
  815. * the set bits.
  816. */
  817. const_iterator begin() const;
  818. /**
  819. * A bogus iterator that can be used together with begin()
  820. * for constructions such as for (auto i = b.begin(); * i!=b.end(); ++i) {}
  821. */
  822. const_iterator &end() const;
  823. roaring_bitmap_t roaring;
  824. };
  825. /**
  826. * Used to go through the set bits. Not optimally fast, but convenient.
  827. */
  828. class RoaringSetBitBiDirectionalIterator final {
  829. public:
  830. typedef std::bidirectional_iterator_tag iterator_category;
  831. typedef uint32_t *pointer;
  832. typedef uint32_t &reference_type;
  833. typedef uint32_t value_type;
  834. typedef int32_t difference_type;
  835. typedef RoaringSetBitBiDirectionalIterator type_of_iterator;
  836. explicit RoaringSetBitBiDirectionalIterator(const Roaring &parent,
  837. bool exhausted = false) {
  838. if (exhausted) {
  839. i.parent = &parent.roaring;
  840. i.container_index = INT32_MAX;
  841. i.has_value = false;
  842. i.current_value = UINT32_MAX;
  843. } else {
  844. api::roaring_iterator_init(&parent.roaring, &i);
  845. }
  846. }
  847. /**
  848. * Provides the location of the set bit.
  849. */
  850. value_type operator*() const { return i.current_value; }
  851. bool operator<(const type_of_iterator &o) const {
  852. if (!i.has_value) return false;
  853. if (!o.i.has_value) return true;
  854. return i.current_value < *o;
  855. }
  856. bool operator<=(const type_of_iterator &o) const {
  857. if (!o.i.has_value) return true;
  858. if (!i.has_value) return false;
  859. return i.current_value <= *o;
  860. }
  861. bool operator>(const type_of_iterator &o) const {
  862. if (!o.i.has_value) return false;
  863. if (!i.has_value) return true;
  864. return i.current_value > *o;
  865. }
  866. bool operator>=(const type_of_iterator &o) const {
  867. if (!i.has_value) return true;
  868. if (!o.i.has_value) return false;
  869. return i.current_value >= *o;
  870. }
  871. type_of_iterator &operator++() { // ++i, must returned inc. value
  872. api::roaring_uint32_iterator_advance(&i);
  873. return *this;
  874. }
  875. type_of_iterator operator++(int) { // i++, must return orig. value
  876. RoaringSetBitBiDirectionalIterator orig(*this);
  877. api::roaring_uint32_iterator_advance(&i);
  878. return orig;
  879. }
  880. /**
  881. * Move the iterator to the first value >= val.
  882. * Return true if there is such a value.
  883. */
  884. bool move_equalorlarger(value_type val) {
  885. return api::roaring_uint32_iterator_move_equalorlarger(&i, val);
  886. }
  887. /** DEPRECATED, use `move_equalorlarger`.*/
  888. CROARING_DEPRECATED void equalorlarger(uint32_t val) {
  889. api::roaring_uint32_iterator_move_equalorlarger(&i, val);
  890. }
  891. type_of_iterator &operator--() { // prefix --
  892. api::roaring_uint32_iterator_previous(&i);
  893. return *this;
  894. }
  895. type_of_iterator operator--(int) { // postfix --
  896. RoaringSetBitBiDirectionalIterator orig(*this);
  897. api::roaring_uint32_iterator_previous(&i);
  898. return orig;
  899. }
  900. bool operator==(const RoaringSetBitBiDirectionalIterator &o) const {
  901. return i.current_value == *o && i.has_value == o.i.has_value;
  902. }
  903. bool operator!=(const RoaringSetBitBiDirectionalIterator &o) const {
  904. return i.current_value != *o || i.has_value != o.i.has_value;
  905. }
  906. api::roaring_uint32_iterator_t
  907. i{}; // The empty constructor silences warnings from pedantic static
  908. // analyzers.
  909. };
  910. inline RoaringSetBitBiDirectionalIterator Roaring::begin() const {
  911. return RoaringSetBitBiDirectionalIterator(*this);
  912. }
  913. inline RoaringSetBitBiDirectionalIterator &Roaring::end() const {
  914. static RoaringSetBitBiDirectionalIterator e(*this, true);
  915. return e;
  916. }
  917. } // namespace roaring
  918. #endif /* INCLUDE_ROARING_HH_ */