/* * portability.h * */ /** * All macros should be prefixed with either CROARING or ROARING. * The library uses both ROARING_... * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for * macros that are provided by the build system or that are closely * related to the format. The header macros may also use ROARING_. * The CROARING_ prefix is for internal macros that a user is unlikely * to ever interact with. */ #ifndef CROARING_INCLUDE_PORTABILITY_H_ #define CROARING_INCLUDE_PORTABILITY_H_ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif // _GNU_SOURCE #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS 1 #endif // __STDC_FORMAT_MACROS #ifdef _MSC_VER #define CROARING_VISUAL_STUDIO 1 /** * We want to differentiate carefully between * clang under visual studio and regular visual * studio. */ #ifdef __clang__ // clang under visual studio #define CROARING_CLANG_VISUAL_STUDIO 1 #else // just regular visual studio (best guess) #define CROARING_REGULAR_VISUAL_STUDIO 1 #endif // __clang__ #endif // _MSC_VER #ifndef CROARING_VISUAL_STUDIO #define CROARING_VISUAL_STUDIO 0 #endif #ifndef CROARING_CLANG_VISUAL_STUDIO #define CROARING_CLANG_VISUAL_STUDIO 0 #endif #ifndef CROARING_REGULAR_VISUAL_STUDIO #define CROARING_REGULAR_VISUAL_STUDIO 0 #endif #if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L) #undef _POSIX_C_SOURCE #endif #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) #if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #define _XOPEN_SOURCE 700 #endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) #ifdef __illumos__ #define __EXTENSIONS__ #endif #include #include #include // will provide posix_memalign with _POSIX_C_SOURCE as defined above #ifdef __GLIBC__ #include // this should never be needed but there are some reports that it is needed. #endif #ifdef __cplusplus extern "C" { // portability definitions are in global scope, not a namespace #endif #if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 #error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. #endif #if CROARING_REGULAR_VISUAL_STUDIO #ifndef __restrict__ #define __restrict__ __restrict #endif // __restrict__ #endif // CROARING_REGULAR_VISUAL_STUDIO #if defined(__x86_64__) || defined(_M_X64) // we have an x64 processor #define CROARING_IS_X64 1 #if defined(_MSC_VER) && (_MSC_VER < 1910) // Old visual studio systems won't support AVX2 well. #undef CROARING_IS_X64 #endif #if defined(__clang_major__) && (__clang_major__ <= 8) && !defined(__AVX2__) // Older versions of clang have a bug affecting us // https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces #undef CROARING_IS_X64 #endif #ifdef ROARING_DISABLE_X64 #undef CROARING_IS_X64 #endif // we include the intrinsic header #if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler */ #include // on some recent GCC, this will declare posix_memalign #if CROARING_CLANG_VISUAL_STUDIO /** * You are not supposed, normally, to include these * headers directly. Instead you should either include intrin.h * or x86intrin.h. However, when compiling with clang * under Windows (i.e., when _MSC_VER is set), these headers * only get included *if* the corresponding features are detected * from macros: * e.g., if __AVX2__ is set... in turn, we normally set these * macros by compiling against the corresponding architecture * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole * software with these advanced instructions. These headers would * normally guard against such usage, but we carefully included * (or ) before, so the headers * are fooled. */ // To avoid reordering imports: // clang-format off #include // for _blsr_u64 #include // for __lzcnt64 #include // for most things (AVX2, AVX512, _popcnt64) #include #include #include #include #include #if _MSC_VER >= 1920 // Important: we need the AVX-512 headers: #include #include #include #include #include #include #include #include // clang-format on #endif // _MSC_VER >= 1920 // unfortunately, we may not get _blsr_u64, but, thankfully, clang // has it as a macro. #ifndef _blsr_u64 // we roll our own #define _blsr_u64(n) ((n - 1) & n) #endif // _blsr_u64 #endif // SIMDJSON_CLANG_VISUAL_STUDIO #endif // CROARING_REGULAR_VISUAL_STUDIO #endif // defined(__x86_64__) || defined(_M_X64) #if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) #define CROARING_USENEON #endif #if defined(CROARING_USENEON) #include #endif #if !CROARING_REGULAR_VISUAL_STUDIO /* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline * assembly */ #define CROARING_INLINE_ASM 1 #endif // _MSC_VER #if CROARING_REGULAR_VISUAL_STUDIO /* Microsoft C/C++-compatible compiler */ #include #ifndef __clang__ // if one compiles with MSVC *with* clang, then these // intrinsics are defined!!! #define CROARING_INTRINSICS 1 // sadly there is no way to check whether we are missing these intrinsics // specifically. /* wrappers for Visual Studio built-ins that look like gcc built-ins * __builtin_ctzll */ /** result might be undefined when input_num is zero */ inline int roaring_trailing_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanForward64(&index, input_num); #else // if we must support 32-bit Windows if ((uint32_t)input_num != 0) { _BitScanForward(&index, (uint32_t)input_num); } else { _BitScanForward(&index, (uint32_t)(input_num >> 32)); index += 32; } #endif // _WIN64 return index; } /* wrappers for Visual Studio built-ins that look like gcc built-ins * __builtin_clzll */ /** result might be undefined when input_num is zero */ inline int roaring_leading_zeroes(unsigned long long input_num) { unsigned long index; #ifdef _WIN64 // highly recommended!!! _BitScanReverse64(&index, input_num); #else // if we must support 32-bit Windows if (input_num > 0xFFFFFFFF) { _BitScanReverse(&index, (uint32_t)(input_num >> 32)); index += 32; } else { _BitScanReverse(&index, (uint32_t)(input_num)); } #endif // _WIN64 return 63 - index; } /* Use #define so this is effective even under /Ob0 (no inline) */ #define roaring_unreachable __assume(0) #endif // __clang__ #endif // CROARING_REGULAR_VISUAL_STUDIO #ifndef CROARING_INTRINSICS #define CROARING_INTRINSICS 1 #define roaring_unreachable __builtin_unreachable() /** result might be undefined when input_num is zero */ inline int roaring_trailing_zeroes(unsigned long long input_num) { return __builtin_ctzll(input_num); } /** result might be undefined when input_num is zero */ inline int roaring_leading_zeroes(unsigned long long input_num) { return __builtin_clzll(input_num); } #endif #if CROARING_REGULAR_VISUAL_STUDIO #define ALIGNED(x) __declspec(align(x)) #elif defined(__GNUC__) || defined(__clang__) #define ALIGNED(x) __attribute__((aligned(x))) #else #warning "Warning. Unrecognized compiler." #define ALIGNED(x) #endif #if defined(__GNUC__) || defined(__clang__) #define CROARING_WARN_UNUSED __attribute__((warn_unused_result)) #else #define CROARING_WARN_UNUSED #endif #define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) #ifdef CROARING_USENEON // we can always compute the popcount fast. #elif (defined(_M_ARM) || defined(_M_ARM64)) && \ ((defined(_WIN64) || defined(_WIN32)) && \ defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO) // we will need this function: static inline int roaring_hamming_backup(uint64_t x) { uint64_t c1 = UINT64_C(0x5555555555555555); uint64_t c2 = UINT64_C(0x3333333333333333); uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F); x -= (x >> 1) & c1; x = ((x >> 2) & c2) + (x & c2); x = (x + (x >> 4)) & c4; x *= UINT64_C(0x0101010101010101); return x >> 56; } #endif static inline int roaring_hamming(uint64_t x) { #if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO #ifdef CROARING_USENEON return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); #elif defined(_M_ARM64) return roaring_hamming_backup(x); // (int) _CountOneBits64(x); is unavailable #else // _M_ARM64 return (int)__popcnt64(x); #endif // _M_ARM64 #elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && \ CROARING_REGULAR_VISUAL_STUDIO #ifdef _M_ARM return roaring_hamming_backup(x); // _CountOneBits is unavailable #else // _M_ARM return (int)__popcnt((unsigned int)x) + (int)__popcnt((unsigned int)(x >> 32)); #endif // _M_ARM #else return __builtin_popcountll(x); #endif } #ifndef UINT64_C #define UINT64_C(c) (c##ULL) #endif // UINT64_C #ifndef UINT32_C #define UINT32_C(c) (c##UL) #endif // UINT32_C #ifdef __cplusplus } // extern "C" { #endif // __cplusplus // this is almost standard? #undef STRINGIFY_IMPLEMENTATION_ #undef STRINGIFY #define STRINGIFY_IMPLEMENTATION_(a) #a #define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a) // Our fast kernels require 64-bit systems. // // On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. // Furthermore, the number of SIMD registers is reduced. // // On 32-bit ARM, we would have smaller registers. // // The library should still have the fallback kernel. It is // slower, but it should run everywhere. // // Enable valid runtime implementations, and select // CROARING_BUILTIN_IMPLEMENTATION // // We are going to use runtime dispatch. #if CROARING_IS_X64 #ifdef __clang__ // clang does not have GCC push pop // warning: clang attribute push can't be used within a namespace in clang up // til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be // *outside* of a namespace. #define CROARING_TARGET_REGION(T) \ _Pragma(STRINGIFY(clang attribute push(__attribute__((target(T))), \ apply_to = function))) #define CROARING_UNTARGET_REGION _Pragma("clang attribute pop") #elif defined(__GNUC__) // GCC is easier #define CROARING_TARGET_REGION(T) \ _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T))) #define CROARING_UNTARGET_REGION _Pragma("GCC pop_options") #endif // clang then gcc #endif // CROARING_IS_X64 // Default target region macros don't do anything. #ifndef CROARING_TARGET_REGION #define CROARING_TARGET_REGION(T) #define CROARING_UNTARGET_REGION #endif #define CROARING_TARGET_AVX2 \ CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") #define CROARING_TARGET_AVX512 \ CROARING_TARGET_REGION( \ "avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw," \ "avx512vbmi2,avx512bitalg,avx512vpopcntdq") #define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION #define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION #ifdef __AVX2__ // No need for runtime dispatching. // It is unnecessary and harmful to old clang to tag regions. #undef CROARING_TARGET_AVX2 #define CROARING_TARGET_AVX2 #undef CROARING_UNTARGET_AVX2 #define CROARING_UNTARGET_AVX2 #endif #if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && \ defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && \ defined(__AVX512VPOPCNTDQ__) // No need for runtime dispatching. // It is unnecessary and harmful to old clang to tag regions. #undef CROARING_TARGET_AVX512 #define CROARING_TARGET_AVX512 #undef CROARING_UNTARGET_AVX512 #define CROARING_UNTARGET_AVX512 #endif // Allow unaligned memory access #if defined(__GNUC__) || defined(__clang__) #define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment"))) #else #define ALLOW_UNALIGNED #endif #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #elif defined(_WIN32) #define CROARING_IS_BIG_ENDIAN 0 #else #if defined(__APPLE__) || \ defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined // __ORDER_BIG_ENDIAN__ #include #elif defined(sun) || \ defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) #error #include #else // defined(__APPLE__) || defined(__FreeBSD__) #ifdef __has_include #if __has_include() #include #endif //__has_include() #endif //__has_include #endif // defined(__APPLE__) || defined(__FreeBSD__) #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) #define CROARING_IS_BIG_ENDIAN 0 #endif #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CROARING_IS_BIG_ENDIAN 0 #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define CROARING_IS_BIG_ENDIAN 1 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #endif // Host <-> big endian conversion. #if CROARING_IS_BIG_ENDIAN #define croaring_htobe64(x) (x) #elif defined(_WIN32) || defined(_WIN64) // CROARING_IS_BIG_ENDIAN #include #define croaring_htobe64(x) _byteswap_uint64(x) #elif defined(__APPLE__) // CROARING_IS_BIG_ENDIAN #include #define croaring_htobe64(x) OSSwapInt64(x) #elif defined(__has_include) && \ __has_include( \ ) && (defined(__linux__) || defined(__FreeBSD__)) // CROARING_IS_BIG_ENDIAN #include #if defined(__linux__) #define croaring_htobe64(x) bswap_64(x) #elif defined(__FreeBSD__) #define croaring_htobe64(x) bswap64(x) #else #warning "Unknown platform, report as an error" #endif #else // CROARING_IS_BIG_ENDIAN // Gets compiled to bswap or equivalent on most compilers. #define croaring_htobe64(x) \ (((x & 0x00000000000000FFULL) << 56) | \ ((x & 0x000000000000FF00ULL) << 40) | \ ((x & 0x0000000000FF0000ULL) << 24) | \ ((x & 0x00000000FF000000ULL) << 8) | ((x & 0x000000FF00000000ULL) >> 8) | \ ((x & 0x0000FF0000000000ULL) >> 24) | \ ((x & 0x00FF000000000000ULL) >> 40) | \ ((x & 0xFF00000000000000ULL) >> 56)) #endif // CROARING_IS_BIG_ENDIAN #define croaring_be64toh(x) croaring_htobe64(x) // End of host <-> big endian conversion. // Defines for the possible CROARING atomic implementations #define CROARING_ATOMIC_IMPL_NONE 1 #define CROARING_ATOMIC_IMPL_CPP 2 #define CROARING_ATOMIC_IMPL_C 3 #define CROARING_ATOMIC_IMPL_C_WINDOWS 4 // If the use has forced a specific implementation, use that, otherwise, // figure out the best implementation we can use. #if !defined(CROARING_ATOMIC_IMPL) #if defined(__cplusplus) && __cplusplus >= 201103L #ifdef __has_include #if __has_include() #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP #endif //__has_include() #else // We lack __has_include to check: #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP #endif //__has_include #elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C #elif CROARING_REGULAR_VISUAL_STUDIO // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/ #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS #endif #endif // !defined(CROARING_ATOMIC_IMPL) #if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C #include typedef _Atomic(uint32_t) croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { // Increasing the reference counter can always be done with // memory_order_relaxed: New references to an object can only be formed from // an existing reference, and passing an existing reference from one thread // to another must already provide any required synchronization. atomic_fetch_add_explicit(val, 1, memory_order_relaxed); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { // It is important to enforce any possible access to the object in one // thread (through an existing reference) to happen before deleting the // object in a different thread. This is achieved by a "release" operation // after dropping a reference (any access to the object through this // reference must obviously happened before), and an "acquire" operation // before deleting the object. bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1; if (is_zero) { atomic_thread_fence(memory_order_acquire); } return is_zero; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return atomic_load_explicit(val, memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP #include typedef std::atomic croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { val->fetch_add(1, std::memory_order_relaxed); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { // See above comments on the c11 atomic implementation for memory ordering bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1; if (is_zero) { std::atomic_thread_fence(std::memory_order_acquire); } return is_zero; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return val->load(std::memory_order_relaxed); } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS #include #pragma intrinsic(_InterlockedIncrement) #pragma intrinsic(_InterlockedDecrement) // _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and // overflow is defined to wrap, so we can pretend it is a uint32_t for our case typedef volatile long croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { _InterlockedIncrement(val); } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { return _InterlockedDecrement(val) == 0; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { // Per // https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access // > Simple reads and writes to properly-aligned 32-bit variables are atomic // > operations. In other words, you will not end up with only one portion // > of the variable updated; all bits are updated in an atomic fashion. return *val; } #elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE #include typedef uint32_t croaring_refcount_t; static inline void croaring_refcount_inc(croaring_refcount_t *val) { *val += 1; } static inline bool croaring_refcount_dec(croaring_refcount_t *val) { assert(*val > 0); *val -= 1; return val == 0; } static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) { return *val; } #else #error "Unknown atomic implementation" #endif #if defined(__GNUC__) || defined(__clang__) #define CROARING_DEPRECATED __attribute__((deprecated)) #elif defined(_MSC_VER) #define CROARING_DEPRECATED __declspec(deprecated) #else #define CROARING_DEPRECATED #endif // defined(__GNUC__) || defined(__clang__) // We want to initialize structs to zero portably (C and C++), without // warnings. We can do mystruct s = CROARING_ZERO_INITIALIZER; #if __cplusplus #define CROARING_ZERO_INITIALIZER \ {} #else #define CROARING_ZERO_INITIALIZER \ { 0 } #endif // We need portability.h to be included first, // but we also always want isadetection.h to be // included (right after). // See https://github.com/RoaringBitmap/CRoaring/issues/394 // There is no scenario where we want portability.h to // be included, but not isadetection.h: the latter is a // strict requirement. #include // include it last! #endif /* INCLUDE_PORTABILITY_H_ */