array_util.h 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. #ifndef CROARING_ARRAY_UTIL_H
  2. #define CROARING_ARRAY_UTIL_H
  3. #include <stddef.h> // for size_t
  4. #include <stdint.h>
  5. #include <roaring/portability.h>
  6. #if CROARING_IS_X64
  7. #ifndef CROARING_COMPILER_SUPPORTS_AVX512
  8. #error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
  9. #endif // CROARING_COMPILER_SUPPORTS_AVX512
  10. #endif
  11. #if defined(__GNUC__) && !defined(__clang__)
  12. #pragma GCC diagnostic push
  13. #pragma GCC diagnostic ignored "-Wuninitialized"
  14. #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
  15. #endif
  16. #ifdef __cplusplus
  17. extern "C" {
  18. namespace roaring {
  19. namespace internal {
  20. #endif
  21. /*
  22. * Good old binary search.
  23. * Assumes that array is sorted, has logarithmic complexity.
  24. * if the result is x, then:
  25. * if ( x>0 ) you have array[x] = ikey
  26. * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that
  27. * array[-x-1]=ikey) keys the array sorted.
  28. */
  29. inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
  30. uint16_t ikey) {
  31. int32_t low = 0;
  32. int32_t high = lenarray - 1;
  33. while (low <= high) {
  34. int32_t middleIndex = (low + high) >> 1;
  35. uint16_t middleValue = array[middleIndex];
  36. if (middleValue < ikey) {
  37. low = middleIndex + 1;
  38. } else if (middleValue > ikey) {
  39. high = middleIndex - 1;
  40. } else {
  41. return middleIndex;
  42. }
  43. }
  44. return -(low + 1);
  45. }
  46. /**
  47. * Galloping search
  48. * Assumes that array is sorted, has logarithmic complexity.
  49. * if the result is x, then if x = length, you have that all values in array
  50. * between pos and length are smaller than min. otherwise returns the first
  51. * index x such that array[x] >= min.
  52. */
  53. static inline int32_t advanceUntil(const uint16_t *array, int32_t pos,
  54. int32_t length, uint16_t min) {
  55. int32_t lower = pos + 1;
  56. if ((lower >= length) || (array[lower] >= min)) {
  57. return lower;
  58. }
  59. int32_t spansize = 1;
  60. while ((lower + spansize < length) && (array[lower + spansize] < min)) {
  61. spansize <<= 1;
  62. }
  63. int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1;
  64. if (array[upper] == min) {
  65. return upper;
  66. }
  67. if (array[upper] < min) {
  68. // means
  69. // array
  70. // has no
  71. // item
  72. // >= min
  73. // pos = array.length;
  74. return length;
  75. }
  76. // we know that the next-smallest span was too small
  77. lower += (spansize >> 1);
  78. int32_t mid = 0;
  79. while (lower + 1 != upper) {
  80. mid = (lower + upper) >> 1;
  81. if (array[mid] == min) {
  82. return mid;
  83. } else if (array[mid] < min) {
  84. lower = mid;
  85. } else {
  86. upper = mid;
  87. }
  88. }
  89. return upper;
  90. }
  91. /**
  92. * Returns number of elements which are less than ikey.
  93. * Array elements must be unique and sorted.
  94. */
  95. static inline int32_t count_less(const uint16_t *array, int32_t lenarray,
  96. uint16_t ikey) {
  97. if (lenarray == 0) return 0;
  98. int32_t pos = binarySearch(array, lenarray, ikey);
  99. return pos >= 0 ? pos : -(pos + 1);
  100. }
  101. /**
  102. * Returns number of elements which are greater than ikey.
  103. * Array elements must be unique and sorted.
  104. */
  105. static inline int32_t count_greater(const uint16_t *array, int32_t lenarray,
  106. uint16_t ikey) {
  107. if (lenarray == 0) return 0;
  108. int32_t pos = binarySearch(array, lenarray, ikey);
  109. if (pos >= 0) {
  110. return lenarray - (pos + 1);
  111. } else {
  112. return lenarray - (-pos - 1);
  113. }
  114. }
  115. /**
  116. * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
  117. * Optimized by D. Lemire on May 3rd 2013
  118. *
  119. * C should have capacity greater than the minimum of s_1 and s_b + 8
  120. * where 8 is sizeof(__m128i)/sizeof(uint16_t).
  121. */
  122. int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
  123. const uint16_t *__restrict__ B, size_t s_b,
  124. uint16_t *C);
  125. int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
  126. const uint16_t *__restrict__ B, size_t s_b);
  127. /**
  128. * Take an array container and write it out to a 32-bit array, using base
  129. * as the offset.
  130. */
  131. int array_container_to_uint32_array_vector16(void *vout, const uint16_t *array,
  132. size_t cardinality, uint32_t base);
  133. #if CROARING_COMPILER_SUPPORTS_AVX512
  134. int avx512_array_container_to_uint32_array(void *vout, const uint16_t *array,
  135. size_t cardinality, uint32_t base);
  136. #endif
  137. /**
  138. * Compute the cardinality of the intersection using SSE4 instructions
  139. */
  140. int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
  141. size_t s_a,
  142. const uint16_t *__restrict__ B,
  143. size_t s_b);
  144. /* Computes the intersection between one small and one large set of uint16_t.
  145. * Stores the result into buffer and return the number of elements. */
  146. int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s,
  147. const uint16_t *largearray, size_t size_l,
  148. uint16_t *buffer);
  149. /* Computes the size of the intersection between one small and one large set of
  150. * uint16_t. */
  151. int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray,
  152. size_t size_s,
  153. const uint16_t *largearray,
  154. size_t size_l);
  155. /* Check whether the size of the intersection between one small and one large
  156. * set of uint16_t is non-zero. */
  157. bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s,
  158. const uint16_t *largearray,
  159. size_t size_l);
  160. /**
  161. * Generic intersection function.
  162. */
  163. int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
  164. const uint16_t *B, const size_t lenB, uint16_t *out);
  165. /**
  166. * Compute the size of the intersection (generic).
  167. */
  168. int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
  169. const uint16_t *B, const size_t lenB);
  170. /**
  171. * Checking whether the size of the intersection is non-zero.
  172. */
  173. bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
  174. const uint16_t *B, const size_t lenB);
  175. /**
  176. * Generic union function.
  177. */
  178. size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
  179. size_t size_2, uint16_t *buffer);
  180. /**
  181. * Generic XOR function.
  182. */
  183. int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
  184. const uint16_t *array_2, int32_t card_2, uint16_t *out);
  185. /**
  186. * Generic difference function (ANDNOT).
  187. */
  188. int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
  189. int length2, uint16_t *a_out);
  190. /**
  191. * Generic intersection function.
  192. */
  193. size_t intersection_uint32(const uint32_t *A, const size_t lenA,
  194. const uint32_t *B, const size_t lenB, uint32_t *out);
  195. /**
  196. * Generic intersection function, returns just the cardinality.
  197. */
  198. size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
  199. const uint32_t *B, const size_t lenB);
  200. /**
  201. * Generic union function.
  202. */
  203. size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
  204. size_t size_2, uint32_t *buffer);
  205. /**
  206. * A fast SSE-based union function.
  207. */
  208. uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1,
  209. const uint16_t *__restrict__ set_2, uint32_t size_2,
  210. uint16_t *__restrict__ buffer);
  211. /**
  212. * A fast SSE-based XOR function.
  213. */
  214. uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
  215. const uint16_t *__restrict__ array2, uint32_t length2,
  216. uint16_t *__restrict__ output);
  217. /**
  218. * A fast SSE-based difference function.
  219. */
  220. int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
  221. const uint16_t *__restrict__ B, size_t s_b,
  222. uint16_t *C);
  223. /**
  224. * Generic union function, returns just the cardinality.
  225. */
  226. size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
  227. const uint32_t *set_2, size_t size_2);
  228. /**
  229. * combines union_uint16 and union_vector16 optimally
  230. */
  231. size_t fast_union_uint16(const uint16_t *set_1, size_t size_1,
  232. const uint16_t *set_2, size_t size_2,
  233. uint16_t *buffer);
  234. bool memequals(const void *s1, const void *s2, size_t n);
  235. #ifdef __cplusplus
  236. }
  237. }
  238. } // extern "C" { namespace roaring { namespace internal {
  239. #endif
  240. #if defined(__GNUC__) && !defined(__clang__)
  241. #pragma GCC diagnostic pop
  242. #endif
  243. #endif