|
@@ -189,11 +189,8 @@ void run_container_offset(const run_container_t *c, container_t **loc,
|
|
|
|
|
|
/* Free memory. */
|
|
|
void run_container_free(run_container_t *run) {
|
|
|
- if (run->runs !=
|
|
|
- NULL) { // Jon Strabala reports that some tools complain otherwise
|
|
|
- roaring_free(run->runs);
|
|
|
- run->runs = NULL; // pedantic
|
|
|
- }
|
|
|
+ if (run == NULL) return;
|
|
|
+ roaring_free(run->runs);
|
|
|
roaring_free(run);
|
|
|
}
|
|
|
|
|
@@ -211,10 +208,7 @@ void run_container_grow(run_container_t *run, int32_t min, bool copy) {
|
|
|
run->capacity * sizeof(rle16_t));
|
|
|
if (run->runs == NULL) roaring_free(oldruns);
|
|
|
} else {
|
|
|
- // Jon Strabala reports that some tools complain otherwise
|
|
|
- if (run->runs != NULL) {
|
|
|
- roaring_free(run->runs);
|
|
|
- }
|
|
|
+ roaring_free(run->runs);
|
|
|
run->runs = (rle16_t *)roaring_malloc(run->capacity * sizeof(rle16_t));
|
|
|
}
|
|
|
// We may have run->runs == NULL.
|
|
@@ -636,24 +630,6 @@ void run_container_andnot(const run_container_t *src_1,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-ALLOW_UNALIGNED
|
|
|
-int run_container_to_uint32_array(void *vout, const run_container_t *cont,
|
|
|
- uint32_t base) {
|
|
|
- int outpos = 0;
|
|
|
- uint32_t *out = (uint32_t *)vout;
|
|
|
- for (int i = 0; i < cont->n_runs; ++i) {
|
|
|
- uint32_t run_start = base + cont->runs[i].value;
|
|
|
- uint16_t le = cont->runs[i].length;
|
|
|
- for (int j = 0; j <= le; ++j) {
|
|
|
- uint32_t val = run_start + j;
|
|
|
- memcpy(out + outpos, &val,
|
|
|
- sizeof(uint32_t)); // should be compiled as a MOV on x64
|
|
|
- outpos++;
|
|
|
- }
|
|
|
- }
|
|
|
- return outpos;
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* Print this container using printf (useful for debugging).
|
|
|
*/
|
|
@@ -1026,6 +1002,47 @@ static inline int _avx2_run_container_cardinality(const run_container_t *run) {
|
|
|
return sum;
|
|
|
}
|
|
|
|
|
|
+ALLOW_UNALIGNED
|
|
|
+int _avx2_run_container_to_uint32_array(void *vout, const run_container_t *cont,
|
|
|
+ uint32_t base) {
|
|
|
+ int outpos = 0;
|
|
|
+ uint32_t *out = (uint32_t *)vout;
|
|
|
+
|
|
|
+ for (int i = 0; i < cont->n_runs; ++i) {
|
|
|
+ uint32_t run_start = base + cont->runs[i].value;
|
|
|
+ uint16_t le = cont->runs[i].length;
|
|
|
+ if (le < 8) {
|
|
|
+ for (int j = 0; j <= le; ++j) {
|
|
|
+ uint32_t val = run_start + j;
|
|
|
+ memcpy(out + outpos, &val,
|
|
|
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
|
|
|
+ outpos++;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ int j = 0;
|
|
|
+ __m256i run_start_v = _mm256_set1_epi32(run_start);
|
|
|
+ // [8,8,8,8....]
|
|
|
+ __m256i inc = _mm256_set1_epi32(8);
|
|
|
+ // used for generate sequence:
|
|
|
+ // [0, 1, 2, 3...], [8, 9, 10,...]
|
|
|
+ __m256i delta = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
|
+ for (j = 0; j + 8 <= le; j += 8) {
|
|
|
+ __m256i val_v = _mm256_add_epi32(run_start_v, delta);
|
|
|
+ _mm256_storeu_si256((__m256i *)(out + outpos), val_v);
|
|
|
+ delta = _mm256_add_epi32(inc, delta);
|
|
|
+ outpos += 8;
|
|
|
+ }
|
|
|
+ for (; j <= le; ++j) {
|
|
|
+ uint32_t val = run_start + j;
|
|
|
+ memcpy(out + outpos, &val,
|
|
|
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
|
|
|
+ outpos++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return outpos;
|
|
|
+}
|
|
|
+
|
|
|
CROARING_UNTARGET_AVX2
|
|
|
|
|
|
/* Get the cardinality of `run'. Requires an actual computation. */
|
|
@@ -1055,6 +1072,34 @@ int run_container_cardinality(const run_container_t *run) {
|
|
|
return _scalar_run_container_cardinality(run);
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+int _scalar_run_container_to_uint32_array(void *vout,
|
|
|
+ const run_container_t *cont,
|
|
|
+ uint32_t base) {
|
|
|
+ int outpos = 0;
|
|
|
+ uint32_t *out = (uint32_t *)vout;
|
|
|
+ for (int i = 0; i < cont->n_runs; ++i) {
|
|
|
+ uint32_t run_start = base + cont->runs[i].value;
|
|
|
+ uint16_t le = cont->runs[i].length;
|
|
|
+ for (int j = 0; j <= le; ++j) {
|
|
|
+ uint32_t val = run_start + j;
|
|
|
+ memcpy(out + outpos, &val,
|
|
|
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
|
|
|
+ outpos++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return outpos;
|
|
|
+}
|
|
|
+
|
|
|
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
|
|
|
+ uint32_t base) {
|
|
|
+ if (croaring_hardware_support() & ROARING_SUPPORTS_AVX2) {
|
|
|
+ return _avx2_run_container_to_uint32_array(vout, cont, base);
|
|
|
+ } else {
|
|
|
+ return _scalar_run_container_to_uint32_array(vout, cont, base);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
#else
|
|
|
|
|
|
/* Get the cardinality of `run'. Requires an actual computation. */
|
|
@@ -1071,6 +1116,25 @@ int run_container_cardinality(const run_container_t *run) {
|
|
|
|
|
|
return sum;
|
|
|
}
|
|
|
+
|
|
|
+ALLOW_UNALIGNED
|
|
|
+int run_container_to_uint32_array(void *vout, const run_container_t *cont,
|
|
|
+ uint32_t base) {
|
|
|
+ int outpos = 0;
|
|
|
+ uint32_t *out = (uint32_t *)vout;
|
|
|
+ for (int i = 0; i < cont->n_runs; ++i) {
|
|
|
+ uint32_t run_start = base + cont->runs[i].value;
|
|
|
+ uint16_t le = cont->runs[i].length;
|
|
|
+ for (int j = 0; j <= le; ++j) {
|
|
|
+ uint32_t val = run_start + j;
|
|
|
+ memcpy(out + outpos, &val,
|
|
|
+ sizeof(uint32_t)); // should be compiled as a MOV on x64
|
|
|
+ outpos++;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return outpos;
|
|
|
+}
|
|
|
+
|
|
|
#endif
|
|
|
|
|
|
#ifdef __cplusplus
|