conf.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. /**
  2. Copyright (c) 2016-2019, Powturbo
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in the
  11. documentation and/or other materials provided with the distribution.
  12. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  13. IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  14. TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  15. PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  16. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  17. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  18. TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  19. PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  20. LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  21. NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  22. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  23. - homepage : https://sites.google.com/site/powturbo/
  24. - github : https://github.com/powturbo
  25. - twitter : https://twitter.com/powturbo
  26. - email : powturbo [_AT_] gmail [_DOT_] com
  27. **/
  28. // conf.h - config & common
  29. #ifndef CONF_H
  30. #define CONF_H
  31. //------------------------- Compiler ------------------------------------------
  32. #if defined(__GNUC__)
  33. #include <stdint.h>
  34. #define ALIGNED(t,v,n) t v __attribute__ ((aligned (n)))
  35. #define ALWAYS_INLINE inline __attribute__((always_inline))
  36. #define NOINLINE __attribute__((noinline))
  37. #define _PACKED __attribute__ ((packed))
  38. #define likely(x) __builtin_expect((x),1)
  39. #define unlikely(x) __builtin_expect((x),0)
  40. #define popcnt32(_x_) __builtin_popcount(_x_)
  41. #define popcnt64(_x_) __builtin_popcountll(_x_)
  42. #if defined(__i386__) || defined(__x86_64__)
  43. //__bsr32: 1:0,2:1,3:1,4:2,5:2,6:2,7:2,8:3,9:3,10:3,11:3,12:3,13:3,14:3,15:3,16:4,17:4,18:4,19:4,20:4,21:4,22:4,23:4,24:4,25:4,26:4,27:4,28:4,29:4,30:4,31:4,32:5
  44. // bsr32: 0:0,1:1,2:2,3:2,4:3,5:3,6:3,7:3,8:4,9:4,10:4,11:4,12:4,13:4,14:4,15:4,16:5,17:5,18:5,19:5,20:5,21:5,22:5,23:5,24:5,25:5,26:5,27:5,28:5,29:5,30:5,31:5,32:6,
  45. static inline int __bsr32( int x) { asm("bsr %1,%0" : "=r" (x) : "rm" (x) ); return x; }
  46. static inline int bsr32( int x) { int b = -1; asm("bsrl %1,%0" : "+r" (b) : "rm" (x) ); return b + 1; }
  47. static inline int bsr64(uint64_t x) { return x?64 - __builtin_clzll(x):0; }
  48. static inline unsigned rol32(unsigned x, int s) { asm ("roll %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
  49. static inline unsigned ror32(unsigned x, int s) { asm ("rorl %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
  50. static inline uint64_t rol64(uint64_t x, int s) { asm ("rolq %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
  51. static inline uint64_t ror64(uint64_t x, int s) { asm ("rorq %%cl,%0" :"=r" (x) :"0" (x),"c" (s)); return x; }
  52. #else
  53. static inline int __bsr32(unsigned x ) { return 31 - __builtin_clz( x); }
  54. static inline int bsr32(int x ) { return x?32 - __builtin_clz( x):0; }
  55. static inline int bsr64(uint64_t x) { return x?64 - __builtin_clzll(x):0; }
  56. static inline unsigned rol32(unsigned x, int s) { return x << s | x >> (32 - s); }
  57. static inline unsigned ror32(unsigned x, int s) { return x >> s | x << (32 - s); }
  58. static inline unsigned rol64(unsigned x, int s) { return x << s | x >> (64 - s); }
  59. static inline unsigned ror64(unsigned x, int s) { return x >> s | x << (64 - s); }
  60. #endif
  61. #define ctz64(_x_) __builtin_ctzll(_x_)
  62. #define ctz32(_x_) __builtin_ctz(_x_) // 0:32 ctz32(1<<a) = a (a=1..31)
  63. #define clz64(_x_) __builtin_clzll(_x_)
  64. #define clz32(_x_) __builtin_clz(_x_)
  65. //#define bswap8(x) (x)
  66. #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
  67. #define bswap16(x) __builtin_bswap16(x)
  68. #else
  69. static inline unsigned short bswap16(unsigned short x) { return __builtin_bswap32(x << 16); }
  70. #endif
  71. #define bswap32(x) __builtin_bswap32(x)
  72. #define bswap64(x) __builtin_bswap64(x)
  73. #elif _MSC_VER //----------------------------------------------------
  74. #include <windows.h>
  75. #include <intrin.h>
  76. #if _MSC_VER < 1600
  77. #error #include "vs/stdint.h"
  78. #define __builtin_prefetch(x,a)
  79. #define inline __inline
  80. #else
  81. #include <stdint.h>
  82. #define __builtin_prefetch(x,a) _mm_prefetch(x, _MM_HINT_NTA)
  83. #endif
  84. #define ALIGNED(t,v,n) __declspec(align(n)) t v
  85. #define ALWAYS_INLINE __forceinline
  86. #define NOINLINE __declspec(noinline)
  87. #define THREADLOCAL __declspec(thread)
  88. #define likely(x) (x)
  89. #define unlikely(x) (x)
  90. static inline int __bsr32(unsigned x) { unsigned long z=0; _BitScanReverse(&z, x); return z; }
  91. static inline int bsr32( unsigned x) { unsigned long z; _BitScanReverse(&z, x); return x?z+1:0; }
  92. static inline int ctz32( unsigned x) { unsigned long z; _BitScanForward(&z, x); return x?z:32; }
  93. static inline int clz32( unsigned x) { unsigned long z; _BitScanReverse(&z, x); return x?31-z:32; }
  94. #if !defined(_M_ARM64) && !defined(_M_X64)
  95. static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
  96. unsigned long x0 = (unsigned long)x, top, bottom; _BitScanForward(&top, (unsigned long)(x >> 32)); _BitScanForward(&bottom, x0);
  97. *ret = x0 ? bottom : 32 + top; return x != 0;
  98. }
  99. static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
  100. unsigned long x1 = (unsigned long)(x >> 32), top, bottom; _BitScanReverse(&top, x1); _BitScanReverse(&bottom, (unsigned long)x);
  101. *ret = x1 ? top + 32 : bottom; return x != 0;
  102. }
  103. #endif
  104. static inline int bsr64(uint64_t x) { unsigned long z=0; _BitScanReverse64(&z, x); return x?z+1:0; }
  105. static inline int ctz64(uint64_t x) { unsigned long z; _BitScanForward64(&z, x); return x?z:64; }
  106. static inline int clz64(uint64_t x) { unsigned long z; _BitScanReverse64(&z, x); return x?63-z:64; }
  107. #define rol32(x,s) _lrotl(x, s)
  108. #define ror32(x,s) _lrotr(x, s)
  109. #define bswap16(x) _byteswap_ushort(x)
  110. #define bswap32(x) _byteswap_ulong(x)
  111. #define bswap64(x) _byteswap_uint64(x)
  112. #define popcnt32(x) __popcnt(x)
  113. #ifdef _WIN64
  114. #define popcnt64(x) __popcnt64(x)
  115. #else
  116. #define popcnt64(x) (popcnt32(x) + popcnt32(x>>32))
  117. #endif
  118. #define sleep(x) Sleep(x/1000)
  119. #define fseeko _fseeki64
  120. #define ftello _ftelli64
  121. #define strcasecmp _stricmp
  122. #define strncasecmp _strnicmp
  123. #define strtoull _strtoui64
  124. static inline double round(double num) { return (num > 0.0) ? floor(num + 0.5) : ceil(num - 0.5); }
  125. #endif
  126. #define bsr8(_x_) bsr32(_x_)
  127. #define bsr16(_x_) bsr32(_x_)
  128. #define ctz8(_x_) ctz32(_x_)
  129. #define ctz16(_x_) ctz32(_x_)
  130. #define clz8(_x_) (clz32(_x_)-24)
  131. #define clz16(_x_) (clz32(_x_)-16)
  132. #define popcnt8(x) popcnt32(x)
  133. #define popcnt16(x) popcnt32(x)
  134. //--------------- Unaligned memory access -------------------------------------
  135. #ifdef UA_MEMCPY
  136. #include <string.h>
  137. static inline unsigned short ctou16(const void *cp) { unsigned short x; memcpy(&x, cp, sizeof(x)); return x; }
  138. static inline unsigned ctou32(const void *cp) { unsigned x; memcpy(&x, cp, sizeof(x)); return x; }
  139. static inline unsigned long long ctou64(const void *cp) { unsigned long long x; memcpy(&x, cp, sizeof(x)); return x; }
  140. static inline size_t ctousz(const void *cp) { size_t x; memcpy(&x, cp, sizeof(x)); return x; }
  141. static inline float ctof32(const void *cp) { float x; memcpy(&x, cp, sizeof(x)); return x; }
  142. static inline double ctof64(const void *cp) { double x; memcpy(&x, cp, sizeof(x)); return x; }
  143. static inline void stou16( void *cp, unsigned short x) { memcpy(cp, &x, sizeof(x)); }
  144. static inline void stou32( void *cp, unsigned x) { memcpy(cp, &x, sizeof(x)); }
  145. static inline void stou64( void *cp, unsigned long long x) { memcpy(cp, &x, sizeof(x)); }
  146. static inline void stousz( void *cp, size_t x) { memcpy(cp, &x, sizeof(x)); }
  147. static inline void stof32( void *cp, float x) { memcpy(cp, &x, sizeof(x)); }
  148. static inline void stof64( void *cp, double x) { memcpy(cp, &x, sizeof(x)); }
  149. #elif defined(__i386__) || defined(__x86_64__) || \
  150. defined(_M_IX86) || defined(_M_AMD64) || _MSC_VER ||\
  151. defined(__powerpc__) || defined(__s390__) ||\
  152. defined(__ARM_FEATURE_UNALIGNED) || defined(__aarch64__) || defined(__arm__) ||\
  153. defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) || \
  154. defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) || \
  155. defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__)
  156. #define ctou16(_cp_) (*(unsigned short *)(_cp_))
  157. #define ctou32(_cp_) (*(unsigned *)(_cp_))
  158. #define ctof32(_cp_) (*(float *)(_cp_))
  159. #if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__s390__) || defined(_MSC_VER)
  160. #define ctou64(_cp_) (*(uint64_t *)(_cp_))
  161. #define ctof64(_cp_) (*(double *)(_cp_))
  162. #elif defined(__ARM_FEATURE_UNALIGNED)
  163. struct _PACKED longu { uint64_t l; };
  164. struct _PACKED doubleu { double d; };
  165. #define ctou64(_cp_) ((struct longu *)(_cp_))->l
  166. #define ctof64(_cp_) ((struct doubleu *)(_cp_))->d
  167. #endif
  168. #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7S__)
  169. struct _PACKED shortu { unsigned short s; };
  170. struct _PACKED unsignedu { unsigned u; };
  171. struct _PACKED longu { uint64_t l; };
  172. struct _PACKED floatu { float f; };
  173. struct _PACKED doubleu { double d; };
  174. #define ctou16(_cp_) ((struct shortu *)(_cp_))->s
  175. #define ctou32(_cp_) ((struct unsignedu *)(_cp_))->u
  176. #define ctou64(_cp_) ((struct longu *)(_cp_))->l
  177. #define ctof32(_cp_) ((struct floatu *)(_cp_))->f
  178. #define ctof64(_cp_) ((struct doubleu *)(_cp_))->d
  179. #else
  180. #error "unknown cpu"
  181. #endif
  182. #define ctou24(_cp_) (ctou32(_cp_) & 0xffffff)
  183. #define ctou48(_cp_) (ctou64(_cp_) & 0xffffffffffffull)
  184. #define ctou8(_cp_) (*(_cp_))
  185. //--------------------- wordsize ----------------------------------------------
  186. #if defined(__64BIT__) || defined(_LP64) || defined(__LP64__) || defined(_WIN64) ||\
  187. defined(__x86_64__) || defined(_M_X64) ||\
  188. defined(__ia64) || defined(_M_IA64) ||\
  189. defined(__aarch64__) ||\
  190. defined(__mips64) ||\
  191. defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) ||\
  192. defined(__s390x__)
  193. #define __WORDSIZE 64
  194. #else
  195. #define __WORDSIZE 32
  196. #endif
  197. #endif
  198. //---------------------misc ---------------------------------------------------
  199. #define BZHI64(_u_, _b_) ((_u_) & ((1ull<<(_b_))-1))
  200. #define BZHI32(_u_, _b_) ((_u_) & ((1u <<(_b_))-1))
  201. #define BZHI16(_u_, _b_) BZHI32(_u_, _b_)
  202. #define BZHI8(_u_, _b_) BZHI32(_u_, _b_)
  203. #define SIZE_ROUNDUP(_n_, _a_) (((size_t)(_n_) + (size_t)((_a_) - 1)) & ~(size_t)((_a_) - 1))
  204. #define ALIGN_DOWN(__ptr, __a) ((void *)((uintptr_t)(__ptr) & ~(uintptr_t)((__a) - 1)))
  205. #define TEMPLATE2_(_x_, _y_) _x_##_y_
  206. #define TEMPLATE2(_x_, _y_) TEMPLATE2_(_x_,_y_)
  207. #define TEMPLATE3_(_x_,_y_,_z_) _x_##_y_##_z_
  208. #define TEMPLATE3(_x_,_y_,_z_) TEMPLATE3_(_x_, _y_, _z_)
  209. #define CACHE_LINE_SIZE 64
  210. #define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4)
  211. //--- NDEBUG -------
  212. #include <stdio.h>
  213. #ifdef _MSC_VER
  214. #ifdef NDEBUG
  215. #define AS(expr, fmt, ...)
  216. #define AC(expr, fmt, ...) do { if(!(expr)) { fprintf(stderr, fmt, ##__VA_ARGS__ ); fflush(stderr); abort(); } } while(0)
  217. #define die(fmt, ...) do { fprintf(stderr, fmt, ##__VA_ARGS__ ); fflush(stderr); exit(-1); } while(0)
  218. #else
  219. #define AS(expr, fmt, ...) do { if(!(expr)) { fflush(stdout);fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ##__VA_ARGS__ ); fflush(stderr); abort(); } } while(0)
  220. #define AC(expr, fmt, ...) do { if(!(expr)) { fflush(stdout);fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ##__VA_ARGS__ ); fflush(stderr); abort(); } } while(0)
  221. #define die(fmt, ...) do { fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ##__VA_ARGS__ ); fflush(stderr); exit(-1); } while(0)
  222. #endif
  223. #else
  224. #ifdef NDEBUG
  225. #define AS(expr, fmt,args...)
  226. #define AC(expr, fmt,args...) do { if(!(expr)) { fprintf(stderr, fmt, ## args ); fflush(stderr); abort(); } } while(0)
  227. #define die(fmt,args...) do { fprintf(stderr, fmt, ## args ); fflush(stderr); exit(-1); } while(0)
  228. #else
  229. #define AS(expr, fmt,args...) do { if(!(expr)) { fflush(stdout);fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ## args ); fflush(stderr); abort(); } } while(0)
  230. #define AC(expr, fmt,args...) do { if(!(expr)) { fflush(stdout);fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ## args ); fflush(stderr); abort(); } } while(0)
  231. #define die(fmt,args...) do { fprintf(stderr, "%s:%s:%d:", __FILE__, __FUNCTION__, __LINE__); fprintf(stderr, fmt, ## args ); fflush(stderr); exit(-1); } while(0)
  232. #endif
  233. #endif