bitset.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*
  2. * bitset.h
  3. *
  4. */
  5. #ifndef INCLUDE_CONTAINERS_BITSET_H_
  6. #define INCLUDE_CONTAINERS_BITSET_H_
  7. #include <stdbool.h>
  8. #include <stdint.h>
  9. #include <roaring/roaring_types.h> // roaring_iterator
  10. // Include other headers after roaring_types.h
  11. #include <roaring/containers/container_defs.h> // container_t, perfparameters
  12. #include <roaring/portability.h>
  13. #include <roaring/roaring_types.h> // roaring_iterator
  14. #include <roaring/utilasm.h> // ASM_XXX macros
  15. #ifdef __cplusplus
  16. extern "C" {
  17. namespace roaring {
  18. // Note: in pure C++ code, you should avoid putting `using` in header files
  19. using api::roaring_iterator;
  20. using api::roaring_iterator64;
  21. namespace internal {
  22. #endif
  23. enum {
  24. BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64,
  25. BITSET_UNKNOWN_CARDINALITY = -1
  26. };
  27. STRUCT_CONTAINER(bitset_container_s) {
  28. int32_t cardinality;
  29. uint64_t *words;
  30. };
  31. typedef struct bitset_container_s bitset_container_t;
  32. #define CAST_bitset(c) CAST(bitset_container_t *, c) // safer downcast
  33. #define const_CAST_bitset(c) CAST(const bitset_container_t *, c)
  34. #define movable_CAST_bitset(c) movable_CAST(bitset_container_t **, c)
  35. /* Create a new bitset. Return NULL in case of failure. */
  36. bitset_container_t *bitset_container_create(void);
  37. /* Free memory. */
  38. void bitset_container_free(bitset_container_t *bitset);
  39. /* Clear bitset (sets bits to 0). */
  40. void bitset_container_clear(bitset_container_t *bitset);
  41. /* Set all bits to 1. */
  42. void bitset_container_set_all(bitset_container_t *bitset);
  43. /* Duplicate bitset */
  44. bitset_container_t *bitset_container_clone(const bitset_container_t *src);
  45. /* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow
  46. * and
  47. * should not be used in performance-sensitive code. Ever. */
  48. void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
  49. uint32_t end);
  50. #if defined(CROARING_ASMBITMANIPOPTIMIZATION) && defined(__AVX2__)
  51. /* Set the ith bit. */
  52. static inline void bitset_container_set(bitset_container_t *bitset,
  53. uint16_t pos) {
  54. uint64_t shift = 6;
  55. uint64_t offset;
  56. uint64_t p = pos;
  57. ASM_SHIFT_RIGHT(p, shift, offset);
  58. uint64_t load = bitset->words[offset];
  59. ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
  60. bitset->words[offset] = load;
  61. }
  62. /* Unset the ith bit. Currently unused. Could be used for optimization. */
  63. /*static inline void bitset_container_unset(bitset_container_t *bitset,
  64. uint16_t pos) {
  65. uint64_t shift = 6;
  66. uint64_t offset;
  67. uint64_t p = pos;
  68. ASM_SHIFT_RIGHT(p, shift, offset);
  69. uint64_t load = bitset->words[offset];
  70. ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
  71. bitset->words[offset] = load;
  72. }*/
  73. /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
  74. * than bitset_container_set. */
  75. static inline bool bitset_container_add(bitset_container_t *bitset,
  76. uint16_t pos) {
  77. uint64_t shift = 6;
  78. uint64_t offset;
  79. uint64_t p = pos;
  80. ASM_SHIFT_RIGHT(p, shift, offset);
  81. uint64_t load = bitset->words[offset];
  82. // could be possibly slightly further optimized
  83. const int32_t oldcard = bitset->cardinality;
  84. ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality);
  85. bitset->words[offset] = load;
  86. return bitset->cardinality - oldcard;
  87. }
  88. /* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
  89. * slower than bitset_container_unset. */
  90. static inline bool bitset_container_remove(bitset_container_t *bitset,
  91. uint16_t pos) {
  92. uint64_t shift = 6;
  93. uint64_t offset;
  94. uint64_t p = pos;
  95. ASM_SHIFT_RIGHT(p, shift, offset);
  96. uint64_t load = bitset->words[offset];
  97. // could be possibly slightly further optimized
  98. const int32_t oldcard = bitset->cardinality;
  99. ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality);
  100. bitset->words[offset] = load;
  101. return oldcard - bitset->cardinality;
  102. }
  103. /* Get the value of the ith bit. */
  104. inline bool bitset_container_get(const bitset_container_t *bitset,
  105. uint16_t pos) {
  106. uint64_t word = bitset->words[pos >> 6];
  107. const uint64_t p = pos;
  108. ASM_INPLACESHIFT_RIGHT(word, p);
  109. return word & 1;
  110. }
  111. #else
  112. /* Set the ith bit. */
  113. static inline void bitset_container_set(bitset_container_t *bitset,
  114. uint16_t pos) {
  115. const uint64_t old_word = bitset->words[pos >> 6];
  116. const int index = pos & 63;
  117. const uint64_t new_word = old_word | (UINT64_C(1) << index);
  118. bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index);
  119. bitset->words[pos >> 6] = new_word;
  120. }
  121. /* Unset the ith bit. Currently unused. */
  122. /*static inline void bitset_container_unset(bitset_container_t *bitset,
  123. uint16_t pos) {
  124. const uint64_t old_word = bitset->words[pos >> 6];
  125. const int index = pos & 63;
  126. const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
  127. bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index);
  128. bitset->words[pos >> 6] = new_word;
  129. }*/
  130. /* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower
  131. * than bitset_container_set. */
  132. static inline bool bitset_container_add(bitset_container_t *bitset,
  133. uint16_t pos) {
  134. const uint64_t old_word = bitset->words[pos >> 6];
  135. const int index = pos & 63;
  136. const uint64_t new_word = old_word | (UINT64_C(1) << index);
  137. const uint64_t increment = (old_word ^ new_word) >> index;
  138. bitset->cardinality += (uint32_t)increment;
  139. bitset->words[pos >> 6] = new_word;
  140. return increment > 0;
  141. }
  142. /* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be
  143. * slower than bitset_container_unset. */
  144. static inline bool bitset_container_remove(bitset_container_t *bitset,
  145. uint16_t pos) {
  146. const uint64_t old_word = bitset->words[pos >> 6];
  147. const int index = pos & 63;
  148. const uint64_t new_word = old_word & (~(UINT64_C(1) << index));
  149. const uint64_t increment = (old_word ^ new_word) >> index;
  150. bitset->cardinality -= (uint32_t)increment;
  151. bitset->words[pos >> 6] = new_word;
  152. return increment > 0;
  153. }
  154. /* Get the value of the ith bit. */
  155. inline bool bitset_container_get(const bitset_container_t *bitset,
  156. uint16_t pos) {
  157. const uint64_t word = bitset->words[pos >> 6];
  158. return (word >> (pos & 63)) & 1;
  159. }
  160. #endif
  161. /*
  162. * Check if all bits are set in a range of positions from pos_start (included)
  163. * to pos_end (excluded).
  164. */
  165. static inline bool bitset_container_get_range(const bitset_container_t *bitset,
  166. uint32_t pos_start,
  167. uint32_t pos_end) {
  168. const uint32_t start = pos_start >> 6;
  169. const uint32_t end = pos_end >> 6;
  170. const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1);
  171. const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1;
  172. if (start == end)
  173. return ((bitset->words[end] & first & last) == (first & last));
  174. if ((bitset->words[start] & first) != first) return false;
  175. if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) &&
  176. ((bitset->words[end] & last) != last)) {
  177. return false;
  178. }
  179. for (uint32_t i = start + 1;
  180. (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i) {
  181. if (bitset->words[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false;
  182. }
  183. return true;
  184. }
  185. /* Check whether `bitset' is present in `array'. Calls bitset_container_get. */
  186. inline bool bitset_container_contains(const bitset_container_t *bitset,
  187. uint16_t pos) {
  188. return bitset_container_get(bitset, pos);
  189. }
  190. /*
  191. * Check whether a range of bits from position `pos_start' (included) to
  192. * `pos_end' (excluded) is present in `bitset'. Calls bitset_container_get_all.
  193. */
  194. static inline bool bitset_container_contains_range(
  195. const bitset_container_t *bitset, uint32_t pos_start, uint32_t pos_end) {
  196. return bitset_container_get_range(bitset, pos_start, pos_end);
  197. }
  198. /* Get the number of bits set */
  199. ALLOW_UNALIGNED
  200. static inline int bitset_container_cardinality(
  201. const bitset_container_t *bitset) {
  202. return bitset->cardinality;
  203. }
  204. /* Copy one container into another. We assume that they are distinct. */
  205. void bitset_container_copy(const bitset_container_t *source,
  206. bitset_container_t *dest);
  207. /* Add all the values [min,max) at a distance k*step from min: min,
  208. * min+step,.... */
  209. void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
  210. uint32_t max, uint16_t step);
  211. /* Get the number of bits set (force computation). This does not modify bitset.
  212. * To update the cardinality, you should do
  213. * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/
  214. int bitset_container_compute_cardinality(const bitset_container_t *bitset);
  215. /* Check whether this bitset is empty,
  216. * it never modifies the bitset struct. */
  217. static inline bool bitset_container_empty(const bitset_container_t *bitset) {
  218. if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) {
  219. for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) {
  220. if ((bitset->words[i]) != 0) return false;
  221. }
  222. return true;
  223. }
  224. return bitset->cardinality == 0;
  225. }
  226. /* Get whether there is at least one bit set (see bitset_container_empty for
  227. the reverse), the bitset is never modified */
  228. static inline bool bitset_container_const_nonzero_cardinality(
  229. const bitset_container_t *bitset) {
  230. return !bitset_container_empty(bitset);
  231. }
  232. /*
  233. * Check whether the two bitsets intersect
  234. */
  235. bool bitset_container_intersect(const bitset_container_t *src_1,
  236. const bitset_container_t *src_2);
  237. /* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
  238. * cardinality. */
  239. int bitset_container_or(const bitset_container_t *src_1,
  240. const bitset_container_t *src_2,
  241. bitset_container_t *dst);
  242. /* Computes the union of bitsets `src_1' and `src_2' and return the cardinality.
  243. */
  244. int bitset_container_or_justcard(const bitset_container_t *src_1,
  245. const bitset_container_t *src_2);
  246. /* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the
  247. * cardinality. Same as bitset_container_or. */
  248. int bitset_container_union(const bitset_container_t *src_1,
  249. const bitset_container_t *src_2,
  250. bitset_container_t *dst);
  251. /* Computes the union of bitsets `src_1' and `src_2' and return the
  252. * cardinality. Same as bitset_container_or_justcard. */
  253. int bitset_container_union_justcard(const bitset_container_t *src_1,
  254. const bitset_container_t *src_2);
  255. /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does
  256. * not update the cardinality. Provided to optimize chained operations. */
  257. int bitset_container_union_nocard(const bitset_container_t *src_1,
  258. const bitset_container_t *src_2,
  259. bitset_container_t *dst);
  260. /* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not
  261. * update the cardinality. Provided to optimize chained operations. */
  262. int bitset_container_or_nocard(const bitset_container_t *src_1,
  263. const bitset_container_t *src_2,
  264. bitset_container_t *dst);
  265. /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
  266. * return the cardinality. */
  267. int bitset_container_and(const bitset_container_t *src_1,
  268. const bitset_container_t *src_2,
  269. bitset_container_t *dst);
  270. /* Computes the intersection of bitsets `src_1' and `src_2' and return the
  271. * cardinality. */
  272. int bitset_container_and_justcard(const bitset_container_t *src_1,
  273. const bitset_container_t *src_2);
  274. /* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and
  275. * return the cardinality. Same as bitset_container_and. */
  276. int bitset_container_intersection(const bitset_container_t *src_1,
  277. const bitset_container_t *src_2,
  278. bitset_container_t *dst);
  279. /* Computes the intersection of bitsets `src_1' and `src_2' and return the
  280. * cardinality. Same as bitset_container_and_justcard. */
  281. int bitset_container_intersection_justcard(const bitset_container_t *src_1,
  282. const bitset_container_t *src_2);
  283. /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
  284. * not update the cardinality. Provided to optimize chained operations. */
  285. int bitset_container_intersection_nocard(const bitset_container_t *src_1,
  286. const bitset_container_t *src_2,
  287. bitset_container_t *dst);
  288. /* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does
  289. * not update the cardinality. Provided to optimize chained operations. */
  290. int bitset_container_and_nocard(const bitset_container_t *src_1,
  291. const bitset_container_t *src_2,
  292. bitset_container_t *dst);
  293. /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and
  294. * return the cardinality. */
  295. int bitset_container_xor(const bitset_container_t *src_1,
  296. const bitset_container_t *src_2,
  297. bitset_container_t *dst);
  298. /* Computes the exclusive or of bitsets `src_1' and `src_2' and return the
  299. * cardinality. */
  300. int bitset_container_xor_justcard(const bitset_container_t *src_1,
  301. const bitset_container_t *src_2);
  302. /* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does
  303. * not update the cardinality. Provided to optimize chained operations. */
  304. int bitset_container_xor_nocard(const bitset_container_t *src_1,
  305. const bitset_container_t *src_2,
  306. bitset_container_t *dst);
  307. /* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the
  308. * cardinality. */
  309. int bitset_container_andnot(const bitset_container_t *src_1,
  310. const bitset_container_t *src_2,
  311. bitset_container_t *dst);
  312. /* Computes the and not of bitsets `src_1' and `src_2' and return the
  313. * cardinality. */
  314. int bitset_container_andnot_justcard(const bitset_container_t *src_1,
  315. const bitset_container_t *src_2);
  316. /* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does
  317. * not update the cardinality. Provided to optimize chained operations. */
  318. int bitset_container_andnot_nocard(const bitset_container_t *src_1,
  319. const bitset_container_t *src_2,
  320. bitset_container_t *dst);
  321. void bitset_container_offset(const bitset_container_t *c, container_t **loc,
  322. container_t **hic, uint16_t offset);
  323. /*
  324. * Write out the 16-bit integers contained in this container as a list of 32-bit
  325. * integers using base
  326. * as the starting value (it might be expected that base has zeros in its 16
  327. * least significant bits).
  328. * The function returns the number of values written.
  329. * The caller is responsible for allocating enough memory in out.
  330. * The out pointer should point to enough memory (the cardinality times 32
  331. * bits).
  332. */
  333. int bitset_container_to_uint32_array(uint32_t *out,
  334. const bitset_container_t *bc,
  335. uint32_t base);
  336. /*
  337. * Print this container using printf (useful for debugging).
  338. */
  339. void bitset_container_printf(const bitset_container_t *v);
  340. /*
  341. * Print this container using printf as a comma-separated list of 32-bit
  342. * integers starting at base.
  343. */
  344. void bitset_container_printf_as_uint32_array(const bitset_container_t *v,
  345. uint32_t base);
  346. bool bitset_container_validate(const bitset_container_t *v,
  347. const char **reason);
  348. /**
  349. * Return the serialized size in bytes of a container.
  350. */
  351. static inline int32_t bitset_container_serialized_size_in_bytes(void) {
  352. return BITSET_CONTAINER_SIZE_IN_WORDS * 8;
  353. }
  354. /**
  355. * Return the the number of runs.
  356. */
  357. int bitset_container_number_of_runs(bitset_container_t *bc);
  358. bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base,
  359. roaring_iterator iterator, void *ptr);
  360. bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base,
  361. roaring_iterator64 iterator, uint64_t high_bits,
  362. void *ptr);
  363. /**
  364. * Writes the underlying array to buf, outputs how many bytes were written.
  365. * This is meant to be byte-by-byte compatible with the Java and Go versions of
  366. * Roaring.
  367. * The number of bytes written should be
  368. * bitset_container_size_in_bytes(container).
  369. */
  370. int32_t bitset_container_write(const bitset_container_t *container, char *buf);
  371. /**
  372. * Reads the instance from buf, outputs how many bytes were read.
  373. * This is meant to be byte-by-byte compatible with the Java and Go versions of
  374. * Roaring.
  375. * The number of bytes read should be bitset_container_size_in_bytes(container).
  376. * You need to provide the (known) cardinality.
  377. */
  378. int32_t bitset_container_read(int32_t cardinality,
  379. bitset_container_t *container, const char *buf);
  380. /**
  381. * Return the serialized size in bytes of a container (see
  382. * bitset_container_write).
  383. * This is meant to be compatible with the Java and Go versions of Roaring and
  384. * assumes
  385. * that the cardinality of the container is already known or can be computed.
  386. */
  387. static inline int32_t bitset_container_size_in_bytes(
  388. const bitset_container_t *container) {
  389. (void)container;
  390. return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
  391. }
  392. /**
  393. * Return true if the two containers have the same content.
  394. */
  395. bool bitset_container_equals(const bitset_container_t *container1,
  396. const bitset_container_t *container2);
  397. /**
  398. * Return true if container1 is a subset of container2.
  399. */
  400. bool bitset_container_is_subset(const bitset_container_t *container1,
  401. const bitset_container_t *container2);
  402. /**
  403. * If the element of given rank is in this container, supposing that the first
  404. * element has rank start_rank, then the function returns true and sets element
  405. * accordingly.
  406. * Otherwise, it returns false and update start_rank.
  407. */
  408. bool bitset_container_select(const bitset_container_t *container,
  409. uint32_t *start_rank, uint32_t rank,
  410. uint32_t *element);
  411. /* Returns the smallest value (assumes not empty) */
  412. uint16_t bitset_container_minimum(const bitset_container_t *container);
  413. /* Returns the largest value (assumes not empty) */
  414. uint16_t bitset_container_maximum(const bitset_container_t *container);
  415. /* Returns the number of values equal or smaller than x */
  416. int bitset_container_rank(const bitset_container_t *container, uint16_t x);
  417. /* bulk version of bitset_container_rank(); return number of consumed elements
  418. */
  419. uint32_t bitset_container_rank_many(const bitset_container_t *container,
  420. uint64_t start_rank, const uint32_t *begin,
  421. const uint32_t *end, uint64_t *ans);
  422. /* Returns the index of x , if not exsist return -1 */
  423. int bitset_container_get_index(const bitset_container_t *container, uint16_t x);
  424. /* Returns the index of the first value equal or larger than x, or -1 */
  425. int bitset_container_index_equalorlarger(const bitset_container_t *container,
  426. uint16_t x);
  427. #ifdef __cplusplus
  428. }
  429. }
  430. } // extern "C" { namespace roaring { namespace internal {
  431. #endif
  432. #endif /* INCLUDE_CONTAINERS_BITSET_H_ */