@@ -189,11 +189,8 @@ void run_container_offset(const run_container_t *c, container_t **loc,
/* Free memory. */
void run_container_free(run_container_t *run) {
- if (run->runs !=
- NULL) { // Jon Strabala reports that some tools complain otherwise
- roaring_free(run->runs);
- run->runs = NULL; // pedantic
- }
+ if (run == NULL) return;
+ roaring_free(run->runs);
@@ -211,10 +208,7 @@ void run_container_grow(run_container_t *run, int32_t min, bool copy) {
run->capacity * sizeof(rle16_t));
if (run->runs == NULL) roaring_free(oldruns);
} else {
- // Jon Strabala reports that some tools complain otherwise
- if (run->runs != NULL) {
- roaring_free(run->runs);
- }
+ roaring_free(run->runs);
run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
// We may have run->runs == NULL.
@@ -636,24 +630,6 @@ void run_container_andnot(const run_container_t *src_1,
-int run_container_to_uint32_array(void *vout, const run_container_t *cont,
- uint32_t base) {
- int outpos = 0;
- uint32_t *out = (uint32_t *)vout;
- for (int i = 0; i < cont->n_runs; ++i) {
- uint32_t run_start = base + cont->runs[i].value;
- uint16_t le = cont->runs[i].length;
- for (int j = 0; j <= le; ++j) {
- uint32_t val = run_start + j;
- memcpy(out + outpos, &val,
- sizeof(uint32_t)); // should be compiled as a MOV on x64
- outpos++;
- }
- }
- return outpos;
* Print this container using printf (useful for debugging).
@@ -1026,6 +1002,47 @@ static inline int _avx2_run_container_cardinality(const run_container_t *run) {
return sum;
+int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ if (le < 8) {
+ for (int j = 0; j <= le; ++j) {
+ uint32_t val = run_start + j;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ } else {
+ int j = 0;
+ __m256i run_start_v = _mm256_set1_epi32(run_start);
+ // [8,8,8,8....]
+ __m256i inc = _mm256_set1_epi32(8);
+ // used for generate sequence:
+ // [0, 1, 2, 3...], [8, 9, 10,...]
+ __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+ for (j = 0; j + 8 <= le; j += 8) {
+ __m256i val_v = _mm256_add_epi32(run_start_v, delta);
+ _mm256_storeu_si256((__m256i *)(out + outpos), val_v);
+ delta = _mm256_add_epi32(inc, delta);
+ outpos += 8;
+ }
+ for (; j <= le; ++j) {
+ uint32_t val = run_start + j;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ }
+ }
+ return outpos;
/* Get the cardinality of `run'. Requires an actual computation. */
@@ -1055,6 +1072,34 @@ int run_container_cardinality(const run_container_t *run) {
return _scalar_run_container_cardinality(run);
+int _scalar_run_container_to_uint32_array(void *vout,
+ const run_container_t *cont,
+ uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ for (int j = 0; j <= le; ++j) {
+ uint32_t val = run_start + j;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ }
+ return outpos;
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base) {
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
+ return _avx2_run_container_to_uint32_array(vout, cont, base);
+ } else {
+ return _scalar_run_container_to_uint32_array(vout, cont, base);
+ }
/* Get the cardinality of `run'. Requires an actual computation. */
@@ -1071,6 +1116,25 @@ int run_container_cardinality(const run_container_t *run) {
return sum;
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
+ uint32_t base) {
+ int outpos = 0;
+ uint32_t *out = (uint32_t *)vout;
+ for (int i = 0; i < cont->n_runs; ++i) {
+ uint32_t run_start = base + cont->runs[i].value;
+ uint16_t le = cont->runs[i].length;
+ for (int j = 0; j <= le; ++j) {
+ uint32_t val = run_start + j;
+ memcpy(out + outpos, &val,
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
+ outpos++;
+ }
+ }
+ return outpos;
#ifdef __cplusplus