array.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. /*
  2. * array.h
  3. *
  4. */
  5. #ifndef INCLUDE_CONTAINERS_ARRAY_H_
  6. #define INCLUDE_CONTAINERS_ARRAY_H_
  7. #include <string.h>
  8. #include <roaring/roaring_types.h> // roaring_iterator
  9. // Include other headers after roaring_types.h
  10. #include <roaring/array_util.h> // binarySearch()/memequals() for inlining
  11. #include <roaring/containers/container_defs.h> // container_t, perfparameters
  12. #include <roaring/portability.h>
  13. #ifdef __cplusplus
  14. extern "C" {
  15. namespace roaring {
  16. // Note: in pure C++ code, you should avoid putting `using` in header files
  17. using api::roaring_iterator;
  18. using api::roaring_iterator64;
  19. namespace internal {
  20. #endif
  21. /* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */
  22. enum { DEFAULT_MAX_SIZE = 4096 };
  23. /* struct array_container - sparse representation of a bitmap
  24. *
  25. * @cardinality: number of indices in `array` (and the bitmap)
  26. * @capacity: allocated size of `array`
  27. * @array: sorted list of integers
  28. */
  29. STRUCT_CONTAINER(array_container_s) {
  30. int32_t cardinality;
  31. int32_t capacity;
  32. uint16_t *array;
  33. };
  34. typedef struct array_container_s array_container_t;
  35. #define CAST_array(c) CAST(array_container_t *, c) // safer downcast
  36. #define const_CAST_array(c) CAST(const array_container_t *, c)
  37. #define movable_CAST_array(c) movable_CAST(array_container_t **, c)
  38. /* Create a new array with default. Return NULL in case of failure. See also
  39. * array_container_create_given_capacity. */
  40. array_container_t *array_container_create(void);
  41. /* Create a new array with a specified capacity size. Return NULL in case of
  42. * failure. */
  43. array_container_t *array_container_create_given_capacity(int32_t size);
  44. /* Create a new array containing all values in [min,max). */
  45. array_container_t *array_container_create_range(uint32_t min, uint32_t max);
  46. /*
  47. * Shrink the capacity to the actual size, return the number of bytes saved.
  48. */
  49. int array_container_shrink_to_fit(array_container_t *src);
  50. /* Free memory owned by `array'. */
  51. void array_container_free(array_container_t *array);
  52. /* Duplicate container */
  53. array_container_t *array_container_clone(const array_container_t *src);
  54. /* Get the cardinality of `array'. */
  55. ALLOW_UNALIGNED
  56. static inline int array_container_cardinality(const array_container_t *array) {
  57. return array->cardinality;
  58. }
  59. static inline bool array_container_nonzero_cardinality(
  60. const array_container_t *array) {
  61. return array->cardinality > 0;
  62. }
  63. /* Copy one container into another. We assume that they are distinct. */
  64. void array_container_copy(const array_container_t *src, array_container_t *dst);
  65. /* Add all the values in [min,max) (included) at a distance k*step from min.
  66. The container must have a size less or equal to DEFAULT_MAX_SIZE after this
  67. addition. */
  68. void array_container_add_from_range(array_container_t *arr, uint32_t min,
  69. uint32_t max, uint16_t step);
  70. static inline bool array_container_empty(const array_container_t *array) {
  71. return array->cardinality == 0;
  72. }
  73. /* check whether the cardinality is equal to the capacity (this does not mean
  74. * that it contains 1<<16 elements) */
  75. static inline bool array_container_full(const array_container_t *array) {
  76. return array->cardinality == array->capacity;
  77. }
  78. /* Compute the union of `src_1' and `src_2' and write the result to `dst'
  79. * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
  80. void array_container_union(const array_container_t *src_1,
  81. const array_container_t *src_2,
  82. array_container_t *dst);
  83. /* symmetric difference, see array_container_union */
  84. void array_container_xor(const array_container_t *array_1,
  85. const array_container_t *array_2,
  86. array_container_t *out);
  87. /* Computes the intersection of src_1 and src_2 and write the result to
  88. * dst. It is assumed that dst is distinct from both src_1 and src_2. */
  89. void array_container_intersection(const array_container_t *src_1,
  90. const array_container_t *src_2,
  91. array_container_t *dst);
  92. /* Check whether src_1 and src_2 intersect. */
  93. bool array_container_intersect(const array_container_t *src_1,
  94. const array_container_t *src_2);
  95. /* computers the size of the intersection between two arrays.
  96. */
  97. int array_container_intersection_cardinality(const array_container_t *src_1,
  98. const array_container_t *src_2);
  99. /* computes the intersection of array1 and array2 and write the result to
  100. * array1.
  101. * */
  102. void array_container_intersection_inplace(array_container_t *src_1,
  103. const array_container_t *src_2);
  104. /*
  105. * Write out the 16-bit integers contained in this container as a list of 32-bit
  106. * integers using base
  107. * as the starting value (it might be expected that base has zeros in its 16
  108. * least significant bits).
  109. * The function returns the number of values written.
  110. * The caller is responsible for allocating enough memory in out.
  111. */
  112. int array_container_to_uint32_array(void *vout, const array_container_t *cont,
  113. uint32_t base);
  114. /* Compute the number of runs */
  115. int32_t array_container_number_of_runs(const array_container_t *ac);
  116. /*
  117. * Print this container using printf (useful for debugging).
  118. */
  119. void array_container_printf(const array_container_t *v);
  120. /*
  121. * Print this container using printf as a comma-separated list of 32-bit
  122. * integers starting at base.
  123. */
  124. void array_container_printf_as_uint32_array(const array_container_t *v,
  125. uint32_t base);
  126. bool array_container_validate(const array_container_t *v, const char **reason);
  127. /**
  128. * Return the serialized size in bytes of a container having cardinality "card".
  129. */
  130. static inline int32_t array_container_serialized_size_in_bytes(int32_t card) {
  131. return card * 2 + 2;
  132. }
  133. /**
  134. * Increase capacity to at least min.
  135. * Whether the existing data needs to be copied over depends on the "preserve"
  136. * parameter. If preserve is false, then the new content will be uninitialized,
  137. * otherwise the old content is copied.
  138. */
  139. void array_container_grow(array_container_t *container, int32_t min,
  140. bool preserve);
  141. bool array_container_iterate(const array_container_t *cont, uint32_t base,
  142. roaring_iterator iterator, void *ptr);
  143. bool array_container_iterate64(const array_container_t *cont, uint32_t base,
  144. roaring_iterator64 iterator, uint64_t high_bits,
  145. void *ptr);
  146. /**
  147. * Writes the underlying array to buf, outputs how many bytes were written.
  148. * This is meant to be byte-by-byte compatible with the Java and Go versions of
  149. * Roaring.
  150. * The number of bytes written should be
  151. * array_container_size_in_bytes(container).
  152. *
  153. */
  154. int32_t array_container_write(const array_container_t *container, char *buf);
  155. /**
  156. * Reads the instance from buf, outputs how many bytes were read.
  157. * This is meant to be byte-by-byte compatible with the Java and Go versions of
  158. * Roaring.
  159. * The number of bytes read should be array_container_size_in_bytes(container).
  160. * You need to provide the (known) cardinality.
  161. */
  162. int32_t array_container_read(int32_t cardinality, array_container_t *container,
  163. const char *buf);
  164. /**
  165. * Return the serialized size in bytes of a container (see
  166. * bitset_container_write)
  167. * This is meant to be compatible with the Java and Go versions of Roaring and
  168. * assumes
  169. * that the cardinality of the container is already known.
  170. *
  171. */
  172. ALLOW_UNALIGNED
  173. static inline int32_t array_container_size_in_bytes(
  174. const array_container_t *container) {
  175. return container->cardinality * sizeof(uint16_t);
  176. }
  177. /**
  178. * Return true if the two arrays have the same content.
  179. */
  180. ALLOW_UNALIGNED
  181. static inline bool array_container_equals(const array_container_t *container1,
  182. const array_container_t *container2) {
  183. if (container1->cardinality != container2->cardinality) {
  184. return false;
  185. }
  186. return memequals(container1->array, container2->array,
  187. container1->cardinality * 2);
  188. }
  189. /**
  190. * Return true if container1 is a subset of container2.
  191. */
  192. bool array_container_is_subset(const array_container_t *container1,
  193. const array_container_t *container2);
  194. /**
  195. * If the element of given rank is in this container, supposing that the first
  196. * element has rank start_rank, then the function returns true and sets element
  197. * accordingly.
  198. * Otherwise, it returns false and update start_rank.
  199. */
  200. static inline bool array_container_select(const array_container_t *container,
  201. uint32_t *start_rank, uint32_t rank,
  202. uint32_t *element) {
  203. int card = array_container_cardinality(container);
  204. if (*start_rank + card <= rank) {
  205. *start_rank += card;
  206. return false;
  207. } else {
  208. *element = container->array[rank - *start_rank];
  209. return true;
  210. }
  211. }
  212. /* Computes the difference of array1 and array2 and write the result
  213. * to array out.
  214. * Array out does not need to be distinct from array_1
  215. */
  216. void array_container_andnot(const array_container_t *array_1,
  217. const array_container_t *array_2,
  218. array_container_t *out);
  219. /* Append x to the set. Assumes that the value is larger than any preceding
  220. * values. */
  221. static inline void array_container_append(array_container_t *arr,
  222. uint16_t pos) {
  223. const int32_t capacity = arr->capacity;
  224. if (array_container_full(arr)) {
  225. array_container_grow(arr, capacity + 1, true);
  226. }
  227. arr->array[arr->cardinality++] = pos;
  228. }
  229. /**
  230. * Add value to the set if final cardinality doesn't exceed max_cardinality.
  231. * Return code:
  232. * 1 -- value was added
  233. * 0 -- value was already present
  234. * -1 -- value was not added because cardinality would exceed max_cardinality
  235. */
  236. static inline int array_container_try_add(array_container_t *arr,
  237. uint16_t value,
  238. int32_t max_cardinality) {
  239. const int32_t cardinality = arr->cardinality;
  240. // best case, we can append.
  241. if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) &&
  242. cardinality < max_cardinality) {
  243. array_container_append(arr, value);
  244. return 1;
  245. }
  246. const int32_t loc = binarySearch(arr->array, cardinality, value);
  247. if (loc >= 0) {
  248. return 0;
  249. } else if (cardinality < max_cardinality) {
  250. if (array_container_full(arr)) {
  251. array_container_grow(arr, arr->capacity + 1, true);
  252. }
  253. const int32_t insert_idx = -loc - 1;
  254. memmove(arr->array + insert_idx + 1, arr->array + insert_idx,
  255. (cardinality - insert_idx) * sizeof(uint16_t));
  256. arr->array[insert_idx] = value;
  257. arr->cardinality++;
  258. return 1;
  259. } else {
  260. return -1;
  261. }
  262. }
  263. /* Add value to the set. Returns true if x was not already present. */
  264. static inline bool array_container_add(array_container_t *arr, uint16_t value) {
  265. return array_container_try_add(arr, value, INT32_MAX) == 1;
  266. }
  267. /* Remove x from the set. Returns true if x was present. */
  268. static inline bool array_container_remove(array_container_t *arr,
  269. uint16_t pos) {
  270. const int32_t idx = binarySearch(arr->array, arr->cardinality, pos);
  271. const bool is_present = idx >= 0;
  272. if (is_present) {
  273. memmove(arr->array + idx, arr->array + idx + 1,
  274. (arr->cardinality - idx - 1) * sizeof(uint16_t));
  275. arr->cardinality--;
  276. }
  277. return is_present;
  278. }
  279. /* Check whether x is present. */
  280. inline bool array_container_contains(const array_container_t *arr,
  281. uint16_t pos) {
  282. // return binarySearch(arr->array, arr->cardinality, pos) >= 0;
  283. // binary search with fallback to linear search for short ranges
  284. int32_t low = 0;
  285. const uint16_t *carr = (const uint16_t *)arr->array;
  286. int32_t high = arr->cardinality - 1;
  287. // while (high - low >= 0) {
  288. while (high >= low + 16) {
  289. int32_t middleIndex = (low + high) >> 1;
  290. uint16_t middleValue = carr[middleIndex];
  291. if (middleValue < pos) {
  292. low = middleIndex + 1;
  293. } else if (middleValue > pos) {
  294. high = middleIndex - 1;
  295. } else {
  296. return true;
  297. }
  298. }
  299. for (int i = low; i <= high; i++) {
  300. uint16_t v = carr[i];
  301. if (v == pos) {
  302. return true;
  303. }
  304. if (v > pos) return false;
  305. }
  306. return false;
  307. }
  308. void array_container_offset(const array_container_t *c, container_t **loc,
  309. container_t **hic, uint16_t offset);
  310. //* Check whether a range of values from range_start (included) to range_end
  311. //(excluded) is present. */
  312. static inline bool array_container_contains_range(const array_container_t *arr,
  313. uint32_t range_start,
  314. uint32_t range_end) {
  315. const int32_t range_count = range_end - range_start;
  316. const uint16_t rs_included = (uint16_t)range_start;
  317. const uint16_t re_included = (uint16_t)(range_end - 1);
  318. // Empty range is always included
  319. if (range_count <= 0) {
  320. return true;
  321. }
  322. if (range_count > arr->cardinality) {
  323. return false;
  324. }
  325. const int32_t start =
  326. binarySearch(arr->array, arr->cardinality, rs_included);
  327. // If this sorted array contains all items in the range:
  328. // * the start item must be found
  329. // * the last item in range range_count must exist, and be the expected end
  330. // value
  331. return (start >= 0) && (arr->cardinality >= start + range_count) &&
  332. (arr->array[start + range_count - 1] == re_included);
  333. }
  334. /* Returns the smallest value (assumes not empty) */
  335. inline uint16_t array_container_minimum(const array_container_t *arr) {
  336. if (arr->cardinality == 0) return 0;
  337. return arr->array[0];
  338. }
  339. /* Returns the largest value (assumes not empty) */
  340. inline uint16_t array_container_maximum(const array_container_t *arr) {
  341. if (arr->cardinality == 0) return 0;
  342. return arr->array[arr->cardinality - 1];
  343. }
  344. /* Returns the number of values equal or smaller than x */
  345. inline int array_container_rank(const array_container_t *arr, uint16_t x) {
  346. const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
  347. const bool is_present = idx >= 0;
  348. if (is_present) {
  349. return idx + 1;
  350. } else {
  351. return -idx - 1;
  352. }
  353. }
  354. /* bulk version of array_container_rank(); return number of consumed elements
  355. */
  356. inline uint32_t array_container_rank_many(const array_container_t *arr,
  357. uint64_t start_rank,
  358. const uint32_t *begin,
  359. const uint32_t *end, uint64_t *ans) {
  360. const uint16_t high = (uint16_t)((*begin) >> 16);
  361. uint32_t pos = 0;
  362. const uint32_t *iter = begin;
  363. for (; iter != end; iter++) {
  364. uint32_t x = *iter;
  365. uint16_t xhigh = (uint16_t)(x >> 16);
  366. if (xhigh != high) return iter - begin; // stop at next container
  367. const int32_t idx =
  368. binarySearch(arr->array + pos, arr->cardinality - pos, (uint16_t)x);
  369. const bool is_present = idx >= 0;
  370. if (is_present) {
  371. *(ans++) = start_rank + pos + (idx + 1);
  372. pos = idx + 1;
  373. } else {
  374. *(ans++) = start_rank + pos + (-idx - 1);
  375. }
  376. }
  377. return iter - begin;
  378. }
  379. /* Returns the index of x , if not exsist return -1 */
  380. inline int array_container_get_index(const array_container_t *arr, uint16_t x) {
  381. const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
  382. const bool is_present = idx >= 0;
  383. if (is_present) {
  384. return idx;
  385. } else {
  386. return -1;
  387. }
  388. }
  389. /* Returns the index of the first value equal or larger than x, or -1 */
  390. inline int array_container_index_equalorlarger(const array_container_t *arr,
  391. uint16_t x) {
  392. const int32_t idx = binarySearch(arr->array, arr->cardinality, x);
  393. const bool is_present = idx >= 0;
  394. if (is_present) {
  395. return idx;
  396. } else {
  397. int32_t candidate = -idx - 1;
  398. if (candidate < arr->cardinality) return candidate;
  399. return -1;
  400. }
  401. }
  402. /*
  403. * Adds all values in range [min,max] using hint:
  404. * nvals_less is the number of array values less than $min
  405. * nvals_greater is the number of array values greater than $max
  406. */
  407. static inline void array_container_add_range_nvals(array_container_t *array,
  408. uint32_t min, uint32_t max,
  409. int32_t nvals_less,
  410. int32_t nvals_greater) {
  411. int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater;
  412. if (union_cardinality > array->capacity) {
  413. array_container_grow(array, union_cardinality, true);
  414. }
  415. memmove(&(array->array[union_cardinality - nvals_greater]),
  416. &(array->array[array->cardinality - nvals_greater]),
  417. nvals_greater * sizeof(uint16_t));
  418. for (uint32_t i = 0; i <= max - min; i++) {
  419. array->array[nvals_less + i] = (uint16_t)(min + i);
  420. }
  421. array->cardinality = union_cardinality;
  422. }
  423. /**
  424. * Adds all values in range [min,max]. This function is currently unused
  425. * and left as a documentation.
  426. */
  427. /*static inline void array_container_add_range(array_container_t *array,
  428. uint32_t min, uint32_t max) {
  429. int32_t nvals_greater = count_greater(array->array, array->cardinality,
  430. max); int32_t nvals_less = count_less(array->array, array->cardinality -
  431. nvals_greater, min); array_container_add_range_nvals(array, min, max,
  432. nvals_less, nvals_greater);
  433. }*/
  434. /*
  435. * Removes all elements array[pos] .. array[pos+count-1]
  436. */
  437. static inline void array_container_remove_range(array_container_t *array,
  438. uint32_t pos, uint32_t count) {
  439. if (count != 0) {
  440. memmove(&(array->array[pos]), &(array->array[pos + count]),
  441. (array->cardinality - pos - count) * sizeof(uint16_t));
  442. array->cardinality -= count;
  443. }
  444. }
  445. #ifdef __cplusplus
  446. }
  447. }
  448. } // extern "C" { namespace roaring { namespace internal {
  449. #endif
  450. #endif /* INCLUDE_CONTAINERS_ARRAY_H_ */