roaring.h 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. /*
  2. * An implementation of Roaring Bitmaps in C.
  3. */
  4. #ifndef ROARING_H
  5. #define ROARING_H
  6. #include <stdbool.h>
  7. #include <stddef.h> // for `size_t`
  8. #include <stdint.h>
  9. #include <roaring/roaring_types.h>
  10. // Include other headers after roaring_types.h
  11. #include <roaring/bitset/bitset.h>
  12. #include <roaring/memory.h>
  13. #include <roaring/portability.h>
  14. #include <roaring/roaring_version.h>
  15. #ifdef __cplusplus
  16. extern "C" {
  17. namespace roaring {
  18. namespace api {
  19. #endif
  20. typedef struct roaring_bitmap_s {
  21. roaring_array_t high_low_container;
  22. } roaring_bitmap_t;
  23. /**
  24. * Dynamically allocates a new bitmap (initially empty).
  25. * Returns NULL if the allocation fails.
  26. * Capacity is a performance hint for how many "containers" the data will need.
  27. * Client is responsible for calling `roaring_bitmap_free()`.
  28. */
  29. roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap);
  30. /**
  31. * Dynamically allocates a new bitmap (initially empty).
  32. * Returns NULL if the allocation fails.
  33. * Client is responsible for calling `roaring_bitmap_free()`.
  34. */
  35. inline roaring_bitmap_t *roaring_bitmap_create(void) {
  36. return roaring_bitmap_create_with_capacity(0);
  37. }
  38. /**
  39. * Initialize a roaring bitmap structure in memory controlled by client.
  40. * Capacity is a performance hint for how many "containers" the data will need.
  41. * Can return false if auxiliary allocations fail when capacity greater than 0.
  42. */
  43. bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap);
  44. /**
  45. * Initialize a roaring bitmap structure in memory controlled by client.
  46. * The bitmap will be in a "clear" state, with no auxiliary allocations.
  47. * Since this performs no allocations, the function will not fail.
  48. */
  49. inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r) {
  50. roaring_bitmap_init_with_capacity(r, 0);
  51. }
  52. /**
  53. * Add all the values between min (included) and max (excluded) that are at a
  54. * distance k*step from min.
  55. */
  56. roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
  57. uint32_t step);
  58. /**
  59. * Creates a new bitmap from a pointer of uint32_t integers
  60. */
  61. roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals);
  62. /*
  63. * Whether you want to use copy-on-write.
  64. * Saves memory and avoids copies, but needs more care in a threaded context.
  65. * Most users should ignore this flag.
  66. *
  67. * Note: If you do turn this flag to 'true', enabling COW, then ensure that you
  68. * do so for all of your bitmaps, since interactions between bitmaps with and
  69. * without COW is unsafe.
  70. */
  71. inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r) {
  72. return r->high_low_container.flags & ROARING_FLAG_COW;
  73. }
  74. inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r, bool cow) {
  75. if (cow) {
  76. r->high_low_container.flags |= ROARING_FLAG_COW;
  77. } else {
  78. r->high_low_container.flags &= ~ROARING_FLAG_COW;
  79. }
  80. }
  81. roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
  82. int64_t offset);
  83. /**
  84. * Describe the inner structure of the bitmap.
  85. */
  86. void roaring_bitmap_printf_describe(const roaring_bitmap_t *r);
  87. /**
  88. * Creates a new bitmap from a list of uint32_t integers
  89. *
  90. * This function is deprecated, use `roaring_bitmap_from` instead, which
  91. * doesn't require the number of elements to be passed in.
  92. *
  93. * @see roaring_bitmap_from
  94. */
  95. CROARING_DEPRECATED roaring_bitmap_t *roaring_bitmap_of(size_t n, ...);
  96. #ifdef __cplusplus
  97. /**
  98. * Creates a new bitmap which contains all values passed in as arguments.
  99. *
  100. * To create a bitmap from a variable number of arguments, use the
  101. * `roaring_bitmap_of_ptr` function instead.
  102. */
  103. // Use an immediately invoked closure, capturing by reference
  104. // (in case __VA_ARGS__ refers to context outside the closure)
  105. // Include a 0 at the beginning of the array to make the array length > 0
  106. // (zero sized arrays are not valid in standard c/c++)
  107. #define roaring_bitmap_from(...) \
  108. [&]() { \
  109. const uint32_t roaring_bitmap_from_array[] = {0, __VA_ARGS__}; \
  110. return roaring_bitmap_of_ptr((sizeof(roaring_bitmap_from_array) / \
  111. sizeof(roaring_bitmap_from_array[0])) - \
  112. 1, \
  113. &roaring_bitmap_from_array[1]); \
  114. }()
  115. #else
  116. /**
  117. * Creates a new bitmap which contains all values passed in as arguments.
  118. *
  119. * To create a bitmap from a variable number of arguments, use the
  120. * `roaring_bitmap_of_ptr` function instead.
  121. */
  122. // While __VA_ARGS__ occurs twice in expansion, one of the times is in a sizeof
  123. // expression, which is an unevaluated context, so it's even safe in the case
  124. // where expressions passed have side effects (roaring64_bitmap_from(my_func(),
  125. // ++i))
  126. // Include a 0 at the beginning of the array to make the array length > 0
  127. // (zero sized arrays are not valid in standard c/c++)
  128. #define roaring_bitmap_from(...) \
  129. roaring_bitmap_of_ptr( \
  130. (sizeof((const uint32_t[]){0, __VA_ARGS__}) / sizeof(uint32_t)) - 1, \
  131. &((const uint32_t[]){0, __VA_ARGS__})[1])
  132. #endif
  133. /**
  134. * Copies a bitmap (this does memory allocation).
  135. * The caller is responsible for memory management.
  136. */
  137. roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
  138. /**
  139. * Copies a bitmap from src to dest. It is assumed that the pointer dest
  140. * is to an already allocated bitmap. The content of the dest bitmap is
  141. * freed/deleted.
  142. *
  143. * It might be preferable and simpler to call roaring_bitmap_copy except
  144. * that roaring_bitmap_overwrite can save on memory allocations.
  145. *
  146. * Returns true if successful, or false if there was an error. On failure,
  147. * the dest bitmap is left in a valid, empty state (even if it was not empty
  148. * before).
  149. */
  150. bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
  151. const roaring_bitmap_t *src);
  152. /**
  153. * Print the content of the bitmap.
  154. */
  155. void roaring_bitmap_printf(const roaring_bitmap_t *r);
  156. /**
  157. * Computes the intersection between two bitmaps and returns new bitmap. The
  158. * caller is responsible for memory management.
  159. *
  160. * Performance hint: if you are computing the intersection between several
  161. * bitmaps, two-by-two, it is best to start with the smallest bitmap.
  162. * You may also rely on roaring_bitmap_and_inplace to avoid creating
  163. * many temporary bitmaps.
  164. */
  165. roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *r1,
  166. const roaring_bitmap_t *r2);
  167. /**
  168. * Computes the size of the intersection between two bitmaps.
  169. */
  170. uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *r1,
  171. const roaring_bitmap_t *r2);
  172. /**
  173. * Check whether two bitmaps intersect.
  174. */
  175. bool roaring_bitmap_intersect(const roaring_bitmap_t *r1,
  176. const roaring_bitmap_t *r2);
  177. /**
  178. * Check whether a bitmap and an open range intersect.
  179. */
  180. bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x,
  181. uint64_t y);
  182. /**
  183. * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto
  184. * distance, or the Jaccard similarity coefficient)
  185. *
  186. * The Jaccard index is undefined if both bitmaps are empty.
  187. */
  188. double roaring_bitmap_jaccard_index(const roaring_bitmap_t *r1,
  189. const roaring_bitmap_t *r2);
  190. /**
  191. * Computes the size of the union between two bitmaps.
  192. */
  193. uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *r1,
  194. const roaring_bitmap_t *r2);
  195. /**
  196. * Computes the size of the difference (andnot) between two bitmaps.
  197. */
  198. uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *r1,
  199. const roaring_bitmap_t *r2);
  200. /**
  201. * Computes the size of the symmetric difference (xor) between two bitmaps.
  202. */
  203. uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *r1,
  204. const roaring_bitmap_t *r2);
  205. /**
  206. * Inplace version of `roaring_bitmap_and()`, modifies r1
  207. * r1 == r2 is allowed.
  208. *
  209. * Performance hint: if you are computing the intersection between several
  210. * bitmaps, two-by-two, it is best to start with the smallest bitmap.
  211. */
  212. void roaring_bitmap_and_inplace(roaring_bitmap_t *r1,
  213. const roaring_bitmap_t *r2);
  214. /**
  215. * Computes the union between two bitmaps and returns new bitmap. The caller is
  216. * responsible for memory management.
  217. */
  218. roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *r1,
  219. const roaring_bitmap_t *r2);
  220. /**
  221. * Inplace version of `roaring_bitmap_or(), modifies r1.
  222. * TODO: decide whether r1 == r2 ok
  223. */
  224. void roaring_bitmap_or_inplace(roaring_bitmap_t *r1,
  225. const roaring_bitmap_t *r2);
  226. /**
  227. * Compute the union of 'number' bitmaps.
  228. * Caller is responsible for freeing the result.
  229. * See also `roaring_bitmap_or_many_heap()`
  230. */
  231. roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
  232. const roaring_bitmap_t **rs);
  233. /**
  234. * Compute the union of 'number' bitmaps using a heap. This can sometimes be
  235. * faster than `roaring_bitmap_or_many() which uses a naive algorithm.
  236. * Caller is responsible for freeing the result.
  237. */
  238. roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
  239. const roaring_bitmap_t **rs);
  240. /**
  241. * Computes the symmetric difference (xor) between two bitmaps
  242. * and returns new bitmap. The caller is responsible for memory management.
  243. */
  244. roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *r1,
  245. const roaring_bitmap_t *r2);
  246. /**
  247. * Inplace version of roaring_bitmap_xor, modifies r1, r1 != r2.
  248. */
  249. void roaring_bitmap_xor_inplace(roaring_bitmap_t *r1,
  250. const roaring_bitmap_t *r2);
  251. /**
  252. * Compute the xor of 'number' bitmaps.
  253. * Caller is responsible for freeing the result.
  254. */
  255. roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
  256. const roaring_bitmap_t **rs);
  257. /**
  258. * Computes the difference (andnot) between two bitmaps and returns new bitmap.
  259. * Caller is responsible for freeing the result.
  260. */
  261. roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *r1,
  262. const roaring_bitmap_t *r2);
  263. /**
  264. * Inplace version of roaring_bitmap_andnot, modifies r1, r1 != r2.
  265. */
  266. void roaring_bitmap_andnot_inplace(roaring_bitmap_t *r1,
  267. const roaring_bitmap_t *r2);
  268. /**
  269. * TODO: consider implementing:
  270. *
  271. * "Compute the xor of 'number' bitmaps using a heap. This can sometimes be
  272. * faster than roaring_bitmap_xor_many which uses a naive algorithm. Caller is
  273. * responsible for freeing the result.""
  274. *
  275. * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number,
  276. * const roaring_bitmap_t **rs);
  277. */
  278. /**
  279. * Frees the memory.
  280. */
  281. void roaring_bitmap_free(const roaring_bitmap_t *r);
  282. /**
  283. * A bit of context usable with `roaring_bitmap_*_bulk()` functions
  284. *
  285. * Should be initialized with `{0}` (or `memset()` to all zeros).
  286. * Callers should treat it as an opaque type.
  287. *
  288. * A context may only be used with a single bitmap
  289. * (unless re-initialized to zero), and any modification to a bitmap
  290. * (other than modifications performed with `_bulk()` functions with the context
  291. * passed) will invalidate any contexts associated with that bitmap.
  292. */
  293. typedef struct roaring_bulk_context_s {
  294. ROARING_CONTAINER_T *container;
  295. int idx;
  296. uint16_t key;
  297. uint8_t typecode;
  298. } roaring_bulk_context_t;
  299. /**
  300. * Add an item, using context from a previous insert for speed optimization.
  301. *
  302. * `context` will be used to store information between calls to make bulk
  303. * operations faster. `*context` should be zero-initialized before the first
  304. * call to this function.
  305. *
  306. * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
  307. * will invalidate the stored context, calling this function with a non-zero
  308. * context after doing any modification invokes undefined behavior.
  309. *
  310. * In order to exploit this optimization, the caller should call this function
  311. * with values with the same "key" (high 16 bits of the value) consecutively.
  312. */
  313. void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
  314. roaring_bulk_context_t *context, uint32_t val);
  315. /**
  316. * Add value n_args from pointer vals, faster than repeatedly calling
  317. * `roaring_bitmap_add()`
  318. *
  319. * In order to exploit this optimization, the caller should attempt to keep
  320. * values with the same "key" (high 16 bits of the value) as consecutive
  321. * elements in `vals`
  322. */
  323. void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
  324. const uint32_t *vals);
  325. /**
  326. * Add value x
  327. */
  328. void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
  329. /**
  330. * Add value x
  331. * Returns true if a new value was added, false if the value already existed.
  332. */
  333. bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x);
  334. /**
  335. * Add all values in range [min, max]
  336. */
  337. void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min,
  338. uint32_t max);
  339. /**
  340. * Add all values in range [min, max)
  341. */
  342. inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min,
  343. uint64_t max) {
  344. if (max <= min) return;
  345. roaring_bitmap_add_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
  346. }
  347. /**
  348. * Remove value x
  349. */
  350. void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
  351. /**
  352. * Remove all values in range [min, max]
  353. */
  354. void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min,
  355. uint32_t max);
  356. /**
  357. * Remove all values in range [min, max)
  358. */
  359. inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min,
  360. uint64_t max) {
  361. if (max <= min) return;
  362. roaring_bitmap_remove_range_closed(r, (uint32_t)min, (uint32_t)(max - 1));
  363. }
  364. /**
  365. * Remove multiple values
  366. */
  367. void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
  368. const uint32_t *vals);
  369. /**
  370. * Remove value x
  371. * Returns true if a new value was removed, false if the value was not existing.
  372. */
  373. bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x);
  374. /**
  375. * Check if value is present
  376. */
  377. bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val);
  378. /**
  379. * Check whether a range of values from range_start (included)
  380. * to range_end (excluded) is present
  381. */
  382. bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
  383. uint64_t range_start, uint64_t range_end);
  384. /**
  385. * Check if an items is present, using context from a previous insert or search
  386. * for speed optimization.
  387. *
  388. * `context` will be used to store information between calls to make bulk
  389. * operations faster. `*context` should be zero-initialized before the first
  390. * call to this function.
  391. *
  392. * Modifying the bitmap in any way (other than `-bulk` suffixed functions)
  393. * will invalidate the stored context, calling this function with a non-zero
  394. * context after doing any modification invokes undefined behavior.
  395. *
  396. * In order to exploit this optimization, the caller should call this function
  397. * with values with the same "key" (high 16 bits of the value) consecutively.
  398. */
  399. bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
  400. roaring_bulk_context_t *context,
  401. uint32_t val);
  402. /**
  403. * Get the cardinality of the bitmap (number of elements).
  404. */
  405. uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r);
  406. /**
  407. * Returns the number of elements in the range [range_start, range_end).
  408. */
  409. uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
  410. uint64_t range_start,
  411. uint64_t range_end);
  412. /**
  413. * Returns true if the bitmap is empty (cardinality is zero).
  414. */
  415. bool roaring_bitmap_is_empty(const roaring_bitmap_t *r);
  416. /**
  417. * Empties the bitmap. It will have no auxiliary allocations (so if the bitmap
  418. * was initialized in client memory via roaring_bitmap_init(), then a call to
  419. * roaring_bitmap_clear() would be enough to "free" it)
  420. */
  421. void roaring_bitmap_clear(roaring_bitmap_t *r);
  422. /**
  423. * Convert the bitmap to a sorted array, output in `ans`.
  424. *
  425. * Caller is responsible to ensure that there is enough memory allocated, e.g.
  426. *
  427. * ans = malloc(roaring_bitmap_get_cardinality(bitmap) * sizeof(uint32_t));
  428. */
  429. void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
  430. /**
  431. * Store the bitmap to a bitset. This can be useful for people
  432. * who need the performance and simplicity of a standard bitset.
  433. * We assume that the input bitset is originally empty (does not
  434. * have any set bit).
  435. *
  436. * bitset_t * out = bitset_create();
  437. * // if the bitset has content in it, call "bitset_clear(out)"
  438. * bool success = roaring_bitmap_to_bitset(mybitmap, out);
  439. * // on failure, success will be false.
  440. * // You can then query the bitset:
  441. * bool is_present = bitset_get(out, 10011 );
  442. * // you must free the memory:
  443. * bitset_free(out);
  444. *
  445. */
  446. bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);
  447. /**
  448. * Convert the bitmap to a sorted array from `offset` by `limit`, output in
  449. * `ans`.
  450. *
  451. * Caller is responsible to ensure that there is enough memory allocated, e.g.
  452. *
  453. * ans = malloc(roaring_bitmap_get_cardinality(limit) * sizeof(uint32_t));
  454. *
  455. * Return false in case of failure (e.g., insufficient memory)
  456. */
  457. bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
  458. size_t limit, uint32_t *ans);
  459. /**
  460. * Remove run-length encoding even when it is more space efficient.
  461. * Return whether a change was applied.
  462. */
  463. bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r);
  464. /**
  465. * Convert array and bitmap containers to run containers when it is more
  466. * efficient; also convert from run containers when more space efficient.
  467. *
  468. * Returns true if the result has at least one run container.
  469. * Additional savings might be possible by calling `shrinkToFit()`.
  470. */
  471. bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
  472. /**
  473. * If needed, reallocate memory to shrink the memory usage.
  474. * Returns the number of bytes saved.
  475. */
  476. size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
  477. /**
  478. * Write the bitmap to an output pointer, this output buffer should refer to
  479. * at least `roaring_bitmap_size_in_bytes(r)` allocated bytes.
  480. *
  481. * See `roaring_bitmap_portable_serialize()` if you want a format that's
  482. * compatible with Java and Go implementations. This format can sometimes be
  483. * more space efficient than the portable form, e.g. when the data is sparse.
  484. *
  485. * Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
  486. *
  487. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  488. * mainframe IBM s390x), the data format is going to be big-endian and not
  489. * compatible with little-endian systems.
  490. */
  491. size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
  492. /**
  493. * Use with `roaring_bitmap_serialize()`.
  494. *
  495. * (See `roaring_bitmap_portable_deserialize()` if you want a format that's
  496. * compatible with Java and Go implementations).
  497. *
  498. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  499. * mainframe IBM s390x), the data format is going to be big-endian and not
  500. * compatible with little-endian systems.
  501. */
  502. roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
  503. /**
  504. * Use with `roaring_bitmap_serialize()`.
  505. *
  506. * (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's
  507. * compatible with Java and Go implementations).
  508. *
  509. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  510. * mainframe IBM s390x), the data format is going to be big-endian and not
  511. * compatible with little-endian systems.
  512. *
  513. * The difference with `roaring_bitmap_deserialize()` is that this function
  514. * checks that the input buffer is a valid bitmap. If the buffer is too small,
  515. * NULL is returned.
  516. */
  517. roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
  518. size_t maxbytes);
  519. /**
  520. * How many bytes are required to serialize this bitmap (NOT compatible
  521. * with Java and Go versions)
  522. */
  523. size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
  524. /**
  525. * Read bitmap from a serialized buffer.
  526. * In case of failure, NULL is returned.
  527. *
  528. * This function is unsafe in the sense that if there is no valid serialized
  529. * bitmap at the pointer, then many bytes could be read, possibly causing a
  530. * buffer overflow. See also roaring_bitmap_portable_deserialize_safe().
  531. *
  532. * This is meant to be compatible with the Java and Go versions:
  533. * https://github.com/RoaringBitmap/RoaringFormatSpec
  534. *
  535. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  536. * mainframe IBM s390x), the data format is going to be big-endian and not
  537. * compatible with little-endian systems.
  538. */
  539. roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
  540. /**
  541. * Read bitmap from a serialized buffer safely (reading up to maxbytes).
  542. * In case of failure, NULL is returned.
  543. *
  544. * This is meant to be compatible with the Java and Go versions:
  545. * https://github.com/RoaringBitmap/RoaringFormatSpec
  546. *
  547. * The function itself is safe in the sense that it will not cause buffer
  548. * overflows. However, for correct operations, it is assumed that the bitmap
  549. * read was once serialized from a valid bitmap (i.e., it follows the format
  550. * specification). If you provided an incorrect input (garbage), then the bitmap
  551. * read may not be in a valid state and following operations may not lead to
  552. * sensible results. In particular, the serialized array containers need to be
  553. * in sorted order, and the run containers should be in sorted non-overlapping
  554. * order. This is is guaranteed to happen when serializing an existing bitmap,
  555. * but not for random inputs.
  556. *
  557. * You may use roaring_bitmap_internal_validate to check the validity of the
  558. * bitmap prior to using it. You may also use other strategies to check for
  559. * corrupted inputs (e.g., checksums).
  560. *
  561. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  562. * mainframe IBM s390x), the data format is going to be big-endian and not
  563. * compatible with little-endian systems.
  564. */
  565. roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
  566. size_t maxbytes);
  567. /**
  568. * Read bitmap from a serialized buffer.
  569. * In case of failure, NULL is returned.
  570. *
  571. * Bitmap returned by this function can be used in all readonly contexts.
  572. * Bitmap must be freed as usual, by calling roaring_bitmap_free().
  573. * Underlying buffer must not be freed or modified while it backs any bitmaps.
  574. *
  575. * The function is unsafe in the following ways:
  576. * 1) It may execute unaligned memory accesses.
  577. * 2) A buffer overflow may occur if buf does not point to a valid serialized
  578. * bitmap.
  579. *
  580. * This is meant to be compatible with the Java and Go versions:
  581. * https://github.com/RoaringBitmap/RoaringFormatSpec
  582. *
  583. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  584. * mainframe IBM s390x), the data format is going to be big-endian and not
  585. * compatible with little-endian systems.
  586. */
  587. roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf);
  588. /**
  589. * Check how many bytes would be read (up to maxbytes) at this pointer if there
  590. * is a bitmap, returns zero if there is no valid bitmap.
  591. *
  592. * This is meant to be compatible with the Java and Go versions:
  593. * https://github.com/RoaringBitmap/RoaringFormatSpec
  594. */
  595. size_t roaring_bitmap_portable_deserialize_size(const char *buf,
  596. size_t maxbytes);
  597. /**
  598. * How many bytes are required to serialize this bitmap.
  599. *
  600. * This is meant to be compatible with the Java and Go versions:
  601. * https://github.com/RoaringBitmap/RoaringFormatSpec
  602. */
  603. size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
  604. /**
  605. * Write a bitmap to a char buffer. The output buffer should refer to at least
  606. * `roaring_bitmap_portable_size_in_bytes(r)` bytes of allocated memory.
  607. *
  608. * Returns how many bytes were written which should match
  609. * `roaring_bitmap_portable_size_in_bytes(r)`.
  610. *
  611. * This is meant to be compatible with the Java and Go versions:
  612. * https://github.com/RoaringBitmap/RoaringFormatSpec
  613. *
  614. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  615. * mainframe IBM s390x), the data format is going to be big-endian and not
  616. * compatible with little-endian systems.
  617. */
  618. size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
  619. /*
  620. * "Frozen" serialization format imitates memory layout of roaring_bitmap_t.
  621. * Deserialized bitmap is a constant view of the underlying buffer.
  622. * This significantly reduces amount of allocations and copying required during
  623. * deserialization.
  624. * It can be used with memory mapped files.
  625. * Example can be found in benchmarks/frozen_benchmark.c
  626. *
  627. * [#####] const roaring_bitmap_t *
  628. * | | |
  629. * +----+ | +-+
  630. * | | |
  631. * [#####################################] underlying buffer
  632. *
  633. * Note that because frozen serialization format imitates C memory layout
  634. * of roaring_bitmap_t, it is not fixed. It is different on big/little endian
  635. * platforms and can be changed in future.
  636. */
  637. /**
  638. * Returns number of bytes required to serialize bitmap using frozen format.
  639. */
  640. size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
  641. /**
  642. * Serializes bitmap using frozen format.
  643. * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes().
  644. *
  645. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  646. * mainframe IBM s390x), the data format is going to be big-endian and not
  647. * compatible with little-endian systems.
  648. */
  649. void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
  650. /**
  651. * Creates constant bitmap that is a view of a given buffer.
  652. * Buffer data should have been written by `roaring_bitmap_frozen_serialize()`
  653. * Its beginning must also be aligned by 32 bytes.
  654. * Length must be equal exactly to `roaring_bitmap_frozen_size_in_bytes()`.
  655. * In case of failure, NULL is returned.
  656. *
  657. * Bitmap returned by this function can be used in all readonly contexts.
  658. * Bitmap must be freed as usual, by calling roaring_bitmap_free().
  659. * Underlying buffer must not be freed or modified while it backs any bitmaps.
  660. *
  661. * This function is endian-sensitive. If you have a big-endian system (e.g., a
  662. * mainframe IBM s390x), the data format is going to be big-endian and not
  663. * compatible with little-endian systems.
  664. */
  665. const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
  666. size_t length);
  667. /**
  668. * Iterate over the bitmap elements. The function iterator is called once for
  669. * all the values with ptr (can be NULL) as the second parameter of each call.
  670. *
  671. * `roaring_iterator` is simply a pointer to a function that returns bool
  672. * (true means that the iteration should continue while false means that it
  673. * should stop), and takes (uint32_t,void*) as inputs.
  674. *
  675. * Returns true if the roaring_iterator returned true throughout (so that all
  676. * data points were necessarily visited).
  677. *
  678. * Iteration is ordered: from the smallest to the largest elements.
  679. */
  680. bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
  681. void *ptr);
  682. bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
  683. uint64_t high_bits, void *ptr);
  684. /**
  685. * Return true if the two bitmaps contain the same elements.
  686. */
  687. bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
  688. const roaring_bitmap_t *r2);
  689. /**
  690. * Return true if all the elements of r1 are also in r2.
  691. */
  692. bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
  693. const roaring_bitmap_t *r2);
  694. /**
  695. * Return true if all the elements of r1 are also in r2, and r2 is strictly
  696. * greater than r1.
  697. */
  698. bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
  699. const roaring_bitmap_t *r2);
  700. /**
  701. * (For expert users who seek high performance.)
  702. *
  703. * Computes the union between two bitmaps and returns new bitmap. The caller is
  704. * responsible for memory management.
  705. *
  706. * The lazy version defers some computations such as the maintenance of the
  707. * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
  708. * after executing "lazy" computations.
  709. *
  710. * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result.
  711. *
  712. * `bitsetconversion` is a flag which determines whether container-container
  713. * operations force a bitset conversion.
  714. */
  715. roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *r1,
  716. const roaring_bitmap_t *r2,
  717. const bool bitsetconversion);
  718. /**
  719. * (For expert users who seek high performance.)
  720. *
  721. * Inplace version of roaring_bitmap_lazy_or, modifies r1.
  722. *
  723. * `bitsetconversion` is a flag which determines whether container-container
  724. * operations force a bitset conversion.
  725. */
  726. void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *r1,
  727. const roaring_bitmap_t *r2,
  728. const bool bitsetconversion);
  729. /**
  730. * (For expert users who seek high performance.)
  731. *
  732. * Execute maintenance on a bitmap created from `roaring_bitmap_lazy_or()`
  733. * or modified with `roaring_bitmap_lazy_or_inplace()`.
  734. */
  735. void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r1);
  736. /**
  737. * Computes the symmetric difference between two bitmaps and returns new bitmap.
  738. * The caller is responsible for memory management.
  739. *
  740. * The lazy version defers some computations such as the maintenance of the
  741. * cardinality counts. Thus you must call `roaring_bitmap_repair_after_lazy()`
  742. * after executing "lazy" computations.
  743. *
  744. * It is safe to repeatedly call `roaring_bitmap_lazy_xor_inplace()` on
  745. * the result.
  746. */
  747. roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *r1,
  748. const roaring_bitmap_t *r2);
  749. /**
  750. * (For expert users who seek high performance.)
  751. *
  752. * Inplace version of roaring_bitmap_lazy_xor, modifies r1. r1 != r2
  753. */
  754. void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *r1,
  755. const roaring_bitmap_t *r2);
  756. /**
  757. * Compute the negation of the bitmap in the interval [range_start, range_end).
  758. * The number of negated values is range_end - range_start.
  759. * Areas outside the range are passed through unchanged.
  760. */
  761. roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *r1,
  762. uint64_t range_start, uint64_t range_end);
  763. /**
  764. * compute (in place) the negation of the roaring bitmap within a specified
  765. * interval: [range_start, range_end). The number of negated values is
  766. * range_end - range_start.
  767. * Areas outside the range are passed through unchanged.
  768. */
  769. void roaring_bitmap_flip_inplace(roaring_bitmap_t *r1, uint64_t range_start,
  770. uint64_t range_end);
  771. /**
  772. * Selects the element at index 'rank' where the smallest element is at index 0.
  773. * If the size of the roaring bitmap is strictly greater than rank, then this
  774. * function returns true and sets element to the element of given rank.
  775. * Otherwise, it returns false.
  776. */
  777. bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank,
  778. uint32_t *element);
  779. /**
  780. * roaring_bitmap_rank returns the number of integers that are smaller or equal
  781. * to x. Thus if x is the first element, this function will return 1. If
  782. * x is smaller than the smallest element, this function will return 0.
  783. *
  784. * The indexing convention differs between roaring_bitmap_select and
  785. * roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value
  786. * as having index 0, whereas roaring_bitmap_rank returns 1 when ranking
  787. * the smallest value.
  788. */
  789. uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x);
  790. /**
  791. * roaring_bitmap_rank_many is an `Bulk` version of `roaring_bitmap_rank`
  792. * it puts rank value of each element in `[begin .. end)` to `ans[]`
  793. *
  794. * the values in `[begin .. end)` must be sorted in Ascending order;
  795. * Caller is responsible to ensure that there is enough memory allocated, e.g.
  796. *
  797. * ans = malloc((end-begin) * sizeof(uint64_t));
  798. */
  799. void roaring_bitmap_rank_many(const roaring_bitmap_t *r, const uint32_t *begin,
  800. const uint32_t *end, uint64_t *ans);
  801. /**
  802. * Returns the index of x in the given roaring bitmap.
  803. * If the roaring bitmap doesn't contain x , this function will return -1.
  804. * The difference with rank function is that this function will return -1 when x
  805. * is not the element of roaring bitmap, but the rank function will return a
  806. * non-negative number.
  807. */
  808. int64_t roaring_bitmap_get_index(const roaring_bitmap_t *r, uint32_t x);
  809. /**
  810. * Returns the smallest value in the set, or UINT32_MAX if the set is empty.
  811. */
  812. uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r);
  813. /**
  814. * Returns the greatest value in the set, or 0 if the set is empty.
  815. */
  816. uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r);
  817. /**
  818. * (For advanced users.)
  819. *
  820. * Collect statistics about the bitmap, see roaring_types.h for
  821. * a description of roaring_statistics_t
  822. */
  823. void roaring_bitmap_statistics(const roaring_bitmap_t *r,
  824. roaring_statistics_t *stat);
  825. /**
  826. * Perform internal consistency checks. Returns true if the bitmap is
  827. * consistent. It may be useful to call this after deserializing bitmaps from
  828. * untrusted sources. If roaring_bitmap_internal_validate returns true, then the
  829. * bitmap should be consistent and can be trusted not to cause crashes or memory
  830. * corruption.
  831. *
  832. * Note that some operations intentionally leave bitmaps in an inconsistent
  833. * state temporarily, for example, `roaring_bitmap_lazy_*` functions, until
  834. * `roaring_bitmap_repair_after_lazy` is called.
  835. *
  836. * If reason is non-null, it will be set to a string describing the first
  837. * inconsistency found if any.
  838. */
  839. bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r,
  840. const char **reason);
  841. /*********************
  842. * What follows is code use to iterate through values in a roaring bitmap
  843. roaring_bitmap_t *r =...
  844. roaring_uint32_iterator_t i;
  845. roaring_iterator_create(r, &i);
  846. while(i.has_value) {
  847. printf("value = %d\n", i.current_value);
  848. roaring_uint32_iterator_advance(&i);
  849. }
  850. Obviously, if you modify the underlying bitmap, the iterator
  851. becomes invalid. So don't.
  852. */
  853. /**
  854. * A struct used to keep iterator state. Users should only access
  855. * `current_value` and `has_value`, the rest of the type should be treated as
  856. * opaque.
  857. */
  858. typedef struct roaring_uint32_iterator_s {
  859. const roaring_bitmap_t *parent; // Owner
  860. const ROARING_CONTAINER_T *container; // Current container
  861. uint8_t typecode; // Typecode of current container
  862. int32_t container_index; // Current container index
  863. uint32_t highbits; // High 16 bits of the current value
  864. roaring_container_iterator_t container_it;
  865. uint32_t current_value;
  866. bool has_value;
  867. } roaring_uint32_iterator_t;
  868. /**
  869. * Initialize an iterator object that can be used to iterate through the values.
  870. * If there is a value, then this iterator points to the first value and
  871. * `it->has_value` is true. The value is in `it->current_value`.
  872. */
  873. void roaring_iterator_init(const roaring_bitmap_t *r,
  874. roaring_uint32_iterator_t *newit);
  875. /** DEPRECATED, use `roaring_iterator_init`. */
  876. CROARING_DEPRECATED static inline void roaring_init_iterator(
  877. const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
  878. roaring_iterator_init(r, newit);
  879. }
  880. /**
  881. * Initialize an iterator object that can be used to iterate through the values.
  882. * If there is a value, then this iterator points to the last value and
  883. * `it->has_value` is true. The value is in `it->current_value`.
  884. */
  885. void roaring_iterator_init_last(const roaring_bitmap_t *r,
  886. roaring_uint32_iterator_t *newit);
  887. /** DEPRECATED, use `roaring_iterator_init_last`. */
  888. CROARING_DEPRECATED static inline void roaring_init_iterator_last(
  889. const roaring_bitmap_t *r, roaring_uint32_iterator_t *newit) {
  890. roaring_iterator_init_last(r, newit);
  891. }
  892. /**
  893. * Create an iterator object that can be used to iterate through the values.
  894. * Caller is responsible for calling `roaring_free_iterator()`.
  895. *
  896. * The iterator is initialized (this function calls `roaring_iterator_init()`)
  897. * If there is a value, then this iterator points to the first value and
  898. * `it->has_value` is true. The value is in `it->current_value`.
  899. */
  900. roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r);
  901. /** DEPRECATED, use `roaring_iterator_create`. */
  902. CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
  903. roaring_create_iterator(const roaring_bitmap_t *r) {
  904. return roaring_iterator_create(r);
  905. }
  906. /**
  907. * Advance the iterator. If there is a new value, then `it->has_value` is true.
  908. * The new value is in `it->current_value`. Values are traversed in increasing
  909. * orders. For convenience, returns `it->has_value`.
  910. *
  911. * Once `it->has_value` is false, `roaring_uint32_iterator_advance` should not
  912. * be called on the iterator again. Calling `roaring_uint32_iterator_previous`
  913. * is allowed.
  914. */
  915. bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it);
  916. /** DEPRECATED, use `roaring_uint32_iterator_advance`. */
  917. CROARING_DEPRECATED static inline bool roaring_advance_uint32_iterator(
  918. roaring_uint32_iterator_t *it) {
  919. return roaring_uint32_iterator_advance(it);
  920. }
  921. /**
  922. * Decrement the iterator. If there's a new value, then `it->has_value` is true.
  923. * The new value is in `it->current_value`. Values are traversed in decreasing
  924. * order. For convenience, returns `it->has_value`.
  925. *
  926. * Once `it->has_value` is false, `roaring_uint32_iterator_previous` should not
  927. * be called on the iterator again. Calling `roaring_uint32_iterator_advance` is
  928. * allowed.
  929. */
  930. bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it);
  931. /** DEPRECATED, use `roaring_uint32_iterator_previous`. */
  932. CROARING_DEPRECATED static inline bool roaring_previous_uint32_iterator(
  933. roaring_uint32_iterator_t *it) {
  934. return roaring_uint32_iterator_previous(it);
  935. }
  936. /**
  937. * Move the iterator to the first value >= `val`. If there is a such a value,
  938. * then `it->has_value` is true. The new value is in `it->current_value`.
  939. * For convenience, returns `it->has_value`.
  940. */
  941. bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it,
  942. uint32_t val);
  943. /** DEPRECATED, use `roaring_uint32_iterator_move_equalorlarger`. */
  944. CROARING_DEPRECATED static inline bool
  945. roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it,
  946. uint32_t val) {
  947. return roaring_uint32_iterator_move_equalorlarger(it, val);
  948. }
  949. /**
  950. * Creates a copy of an iterator.
  951. * Caller must free it.
  952. */
  953. roaring_uint32_iterator_t *roaring_uint32_iterator_copy(
  954. const roaring_uint32_iterator_t *it);
  955. /** DEPRECATED, use `roaring_uint32_iterator_copy`. */
  956. CROARING_DEPRECATED static inline roaring_uint32_iterator_t *
  957. roaring_copy_uint32_iterator(const roaring_uint32_iterator_t *it) {
  958. return roaring_uint32_iterator_copy(it);
  959. }
  960. /**
  961. * Free memory following `roaring_iterator_create()`
  962. */
  963. void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it);
  964. /** DEPRECATED, use `roaring_uint32_iterator_free`. */
  965. CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
  966. roaring_uint32_iterator_t *it) {
  967. roaring_uint32_iterator_free(it);
  968. }
  969. /*
  970. * Reads next ${count} values from iterator into user-supplied ${buf}.
  971. * Returns the number of read elements.
  972. * This number can be smaller than ${count}, which means that iterator is
  973. * drained.
  974. *
  975. * This function satisfies semantics of iteration and can be used together with
  976. * other iterator functions.
  977. * - first value is copied from ${it}->current_value
  978. * - after function returns, iterator is positioned at the next element
  979. */
  980. uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
  981. uint32_t *buf, uint32_t count);
  982. /** DEPRECATED, use `roaring_uint32_iterator_read`. */
  983. CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
  984. roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) {
  985. return roaring_uint32_iterator_read(it, buf, count);
  986. }
  987. #ifdef __cplusplus
  988. }
  989. }
  990. } // extern "C" { namespace roaring { namespace api {
  991. #endif
  992. #endif /* ROARING_H */
  993. #ifdef __cplusplus
  994. /**
  995. * Best practices for C++ headers is to avoid polluting global scope.
  996. * But for C compatibility when just `roaring.h` is included building as
  997. * C++, default to global access for the C public API.
  998. *
  999. * BUT when `roaring.hh` is included instead, it sets this flag. That way
  1000. * explicit namespacing must be used to get the C functions.
  1001. *
  1002. * This is outside the include guard so that if you include BOTH headers,
  1003. * the order won't matter; you still get the global definitions.
  1004. */
  1005. #if !defined(ROARING_API_NOT_IN_GLOBAL_NAMESPACE)
  1006. using namespace ::roaring::api;
  1007. #endif
  1008. #endif