tuklib_integer.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. /// \file tuklib_integer.h
  4. /// \brief Various integer and bit operations
  5. ///
  6. /// This file provides macros or functions to do some basic integer and bit
  7. /// operations.
  8. ///
  9. /// Native endian inline functions (XX = 16, 32, or 64):
  10. /// - Unaligned native endian reads: readXXne(ptr)
  11. /// - Unaligned native endian writes: writeXXne(ptr, num)
  12. /// - Aligned native endian reads: aligned_readXXne(ptr)
  13. /// - Aligned native endian writes: aligned_writeXXne(ptr, num)
  14. ///
  15. /// Endianness-converting integer operations (these can be macros!)
  16. /// (XX = 16, 32, or 64; Y = b or l):
  17. /// - Byte swapping: bswapXX(num)
  18. /// - Byte order conversions to/from native (byteswaps if Y isn't
  19. /// the native endianness): convXXYe(num)
  20. /// - Unaligned reads: readXXYe(ptr)
  21. /// - Unaligned writes: writeXXYe(ptr, num)
  22. /// - Aligned reads: aligned_readXXYe(ptr)
  23. /// - Aligned writes: aligned_writeXXYe(ptr, num)
  24. ///
  25. /// Since the above can macros, the arguments should have no side effects
  26. /// because they may be evaluated more than once.
  27. ///
  28. /// Bit scan operations for non-zero 32-bit integers (inline functions):
  29. /// - Bit scan reverse (find highest non-zero bit): bsr32(num)
  30. /// - Count leading zeros: clz32(num)
  31. /// - Count trailing zeros: ctz32(num)
  32. /// - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  33. ///
  34. /// The above bit scan operations return 0-31. If num is zero,
  35. /// the result is undefined.
  36. //
  37. // Authors: Lasse Collin
  38. // Joachim Henke
  39. //
  40. // This file has been put into the public domain.
  41. // You can do whatever you want with this file.
  42. //
  43. ///////////////////////////////////////////////////////////////////////////////
  44. #ifndef TUKLIB_INTEGER_H
  45. #define TUKLIB_INTEGER_H
  46. #include "tuklib_common.h"
  47. #include <string.h>
  48. // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
  49. // and such functions.
  50. #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
  51. # include <immintrin.h>
  52. #endif
  53. ///////////////////
  54. // Byte swapping //
  55. ///////////////////
  56. #if defined(HAVE___BUILTIN_BSWAPXX)
  57. // GCC >= 4.8 and Clang
  58. # define bswap16(n) __builtin_bswap16(n)
  59. # define bswap32(n) __builtin_bswap32(n)
  60. # define bswap64(n) __builtin_bswap64(n)
  61. #elif defined(HAVE_BYTESWAP_H)
  62. // glibc, uClibc, dietlibc
  63. # include <byteswap.h>
  64. # ifdef HAVE_BSWAP_16
  65. # define bswap16(num) bswap_16(num)
  66. # endif
  67. # ifdef HAVE_BSWAP_32
  68. # define bswap32(num) bswap_32(num)
  69. # endif
  70. # ifdef HAVE_BSWAP_64
  71. # define bswap64(num) bswap_64(num)
  72. # endif
  73. #elif defined(HAVE_SYS_ENDIAN_H)
  74. // *BSDs and Darwin
  75. # include <sys/endian.h>
  76. #elif defined(HAVE_SYS_BYTEORDER_H)
  77. // Solaris
  78. # include <sys/byteorder.h>
  79. # ifdef BSWAP_16
  80. # define bswap16(num) BSWAP_16(num)
  81. # endif
  82. # ifdef BSWAP_32
  83. # define bswap32(num) BSWAP_32(num)
  84. # endif
  85. # ifdef BSWAP_64
  86. # define bswap64(num) BSWAP_64(num)
  87. # endif
  88. # ifdef BE_16
  89. # define conv16be(num) BE_16(num)
  90. # endif
  91. # ifdef BE_32
  92. # define conv32be(num) BE_32(num)
  93. # endif
  94. # ifdef BE_64
  95. # define conv64be(num) BE_64(num)
  96. # endif
  97. # ifdef LE_16
  98. # define conv16le(num) LE_16(num)
  99. # endif
  100. # ifdef LE_32
  101. # define conv32le(num) LE_32(num)
  102. # endif
  103. # ifdef LE_64
  104. # define conv64le(num) LE_64(num)
  105. # endif
  106. #endif
  107. #ifndef bswap16
  108. # define bswap16(n) (uint16_t)( \
  109. (((n) & 0x00FFU) << 8) \
  110. | (((n) & 0xFF00U) >> 8) \
  111. )
  112. #endif
  113. #ifndef bswap32
  114. # define bswap32(n) (uint32_t)( \
  115. (((n) & UINT32_C(0x000000FF)) << 24) \
  116. | (((n) & UINT32_C(0x0000FF00)) << 8) \
  117. | (((n) & UINT32_C(0x00FF0000)) >> 8) \
  118. | (((n) & UINT32_C(0xFF000000)) >> 24) \
  119. )
  120. #endif
  121. #ifndef bswap64
  122. # define bswap64(n) (uint64_t)( \
  123. (((n) & UINT64_C(0x00000000000000FF)) << 56) \
  124. | (((n) & UINT64_C(0x000000000000FF00)) << 40) \
  125. | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
  126. | (((n) & UINT64_C(0x00000000FF000000)) << 8) \
  127. | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
  128. | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
  129. | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
  130. | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
  131. )
  132. #endif
  133. // Define conversion macros using the basic byte swapping macros.
  134. #ifdef WORDS_BIGENDIAN
  135. # ifndef conv16be
  136. # define conv16be(num) ((uint16_t)(num))
  137. # endif
  138. # ifndef conv32be
  139. # define conv32be(num) ((uint32_t)(num))
  140. # endif
  141. # ifndef conv64be
  142. # define conv64be(num) ((uint64_t)(num))
  143. # endif
  144. # ifndef conv16le
  145. # define conv16le(num) bswap16(num)
  146. # endif
  147. # ifndef conv32le
  148. # define conv32le(num) bswap32(num)
  149. # endif
  150. # ifndef conv64le
  151. # define conv64le(num) bswap64(num)
  152. # endif
  153. #else
  154. # ifndef conv16be
  155. # define conv16be(num) bswap16(num)
  156. # endif
  157. # ifndef conv32be
  158. # define conv32be(num) bswap32(num)
  159. # endif
  160. # ifndef conv64be
  161. # define conv64be(num) bswap64(num)
  162. # endif
  163. # ifndef conv16le
  164. # define conv16le(num) ((uint16_t)(num))
  165. # endif
  166. # ifndef conv32le
  167. # define conv32le(num) ((uint32_t)(num))
  168. # endif
  169. # ifndef conv64le
  170. # define conv64le(num) ((uint64_t)(num))
  171. # endif
  172. #endif
  173. ////////////////////////////////
  174. // Unaligned reads and writes //
  175. ////////////////////////////////
  176. // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
  177. // is bad even if the uint8_pointer is properly aligned because this kind
  178. // of casts break strict aliasing rules and result in undefined behavior.
  179. // With unaligned pointers it's even worse: compilers may emit vector
  180. // instructions that require aligned pointers even if non-vector
  181. // instructions work with unaligned pointers.
  182. //
  183. // Using memcpy() is the standard compliant way to do unaligned access.
  184. // Many modern compilers inline it so there is no function call overhead.
  185. // For those compilers that don't handle the memcpy() method well, the
  186. // old casting method (that violates strict aliasing) can be requested at
  187. // build time. A third method, casting to a packed struct, would also be
  188. // an option but isn't provided to keep things simpler (it's already a mess).
  189. // Hopefully this is flexible enough in practice.
  190. static inline uint16_t
  191. read16ne(const uint8_t *buf)
  192. {
  193. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  194. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  195. return *(const uint16_t *)buf;
  196. #else
  197. uint16_t num;
  198. memcpy(&num, buf, sizeof(num));
  199. return num;
  200. #endif
  201. }
  202. static inline uint32_t
  203. read32ne(const uint8_t *buf)
  204. {
  205. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  206. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  207. return *(const uint32_t *)buf;
  208. #else
  209. uint32_t num;
  210. memcpy(&num, buf, sizeof(num));
  211. return num;
  212. #endif
  213. }
  214. static inline uint64_t
  215. read64ne(const uint8_t *buf)
  216. {
  217. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  218. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  219. return *(const uint64_t *)buf;
  220. #else
  221. uint64_t num;
  222. memcpy(&num, buf, sizeof(num));
  223. return num;
  224. #endif
  225. }
  226. static inline void
  227. write16ne(uint8_t *buf, uint16_t num)
  228. {
  229. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  230. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  231. *(uint16_t *)buf = num;
  232. #else
  233. memcpy(buf, &num, sizeof(num));
  234. #endif
  235. return;
  236. }
  237. static inline void
  238. write32ne(uint8_t *buf, uint32_t num)
  239. {
  240. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  241. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  242. *(uint32_t *)buf = num;
  243. #else
  244. memcpy(buf, &num, sizeof(num));
  245. #endif
  246. return;
  247. }
  248. static inline void
  249. write64ne(uint8_t *buf, uint64_t num)
  250. {
  251. #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
  252. && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
  253. *(uint64_t *)buf = num;
  254. #else
  255. memcpy(buf, &num, sizeof(num));
  256. #endif
  257. return;
  258. }
  259. static inline uint16_t
  260. read16be(const uint8_t *buf)
  261. {
  262. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  263. uint16_t num = read16ne(buf);
  264. return conv16be(num);
  265. #else
  266. uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
  267. return num;
  268. #endif
  269. }
  270. static inline uint16_t
  271. read16le(const uint8_t *buf)
  272. {
  273. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  274. uint16_t num = read16ne(buf);
  275. return conv16le(num);
  276. #else
  277. uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
  278. return num;
  279. #endif
  280. }
  281. static inline uint32_t
  282. read32be(const uint8_t *buf)
  283. {
  284. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  285. uint32_t num = read32ne(buf);
  286. return conv32be(num);
  287. #else
  288. uint32_t num = (uint32_t)buf[0] << 24;
  289. num |= (uint32_t)buf[1] << 16;
  290. num |= (uint32_t)buf[2] << 8;
  291. num |= (uint32_t)buf[3];
  292. return num;
  293. #endif
  294. }
  295. static inline uint32_t
  296. read32le(const uint8_t *buf)
  297. {
  298. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  299. uint32_t num = read32ne(buf);
  300. return conv32le(num);
  301. #else
  302. uint32_t num = (uint32_t)buf[0];
  303. num |= (uint32_t)buf[1] << 8;
  304. num |= (uint32_t)buf[2] << 16;
  305. num |= (uint32_t)buf[3] << 24;
  306. return num;
  307. #endif
  308. }
  309. static inline uint64_t
  310. read64be(const uint8_t *buf)
  311. {
  312. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  313. uint64_t num = read64ne(buf);
  314. return conv64be(num);
  315. #else
  316. uint64_t num = (uint64_t)buf[0] << 56;
  317. num |= (uint64_t)buf[1] << 48;
  318. num |= (uint64_t)buf[2] << 40;
  319. num |= (uint64_t)buf[3] << 32;
  320. num |= (uint64_t)buf[4] << 24;
  321. num |= (uint64_t)buf[5] << 16;
  322. num |= (uint64_t)buf[6] << 8;
  323. num |= (uint64_t)buf[7];
  324. return num;
  325. #endif
  326. }
  327. static inline uint64_t
  328. read64le(const uint8_t *buf)
  329. {
  330. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  331. uint64_t num = read64ne(buf);
  332. return conv64le(num);
  333. #else
  334. uint64_t num = (uint64_t)buf[0];
  335. num |= (uint64_t)buf[1] << 8;
  336. num |= (uint64_t)buf[2] << 16;
  337. num |= (uint64_t)buf[3] << 24;
  338. num |= (uint64_t)buf[4] << 32;
  339. num |= (uint64_t)buf[5] << 40;
  340. num |= (uint64_t)buf[6] << 48;
  341. num |= (uint64_t)buf[7] << 56;
  342. return num;
  343. #endif
  344. }
  345. // NOTE: Possible byte swapping must be done in a macro to allow the compiler
  346. // to optimize byte swapping of constants when using glibc's or *BSD's
  347. // byte swapping macros. The actual write is done in an inline function
  348. // to make type checking of the buf pointer possible.
  349. #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  350. # define write16be(buf, num) write16ne(buf, conv16be(num))
  351. # define write32be(buf, num) write32ne(buf, conv32be(num))
  352. # define write64be(buf, num) write64ne(buf, conv64be(num))
  353. #endif
  354. #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
  355. # define write16le(buf, num) write16ne(buf, conv16le(num))
  356. # define write32le(buf, num) write32ne(buf, conv32le(num))
  357. # define write64le(buf, num) write64ne(buf, conv64le(num))
  358. #endif
  359. #ifndef write16be
  360. static inline void
  361. write16be(uint8_t *buf, uint16_t num)
  362. {
  363. buf[0] = (uint8_t)(num >> 8);
  364. buf[1] = (uint8_t)num;
  365. return;
  366. }
  367. #endif
  368. #ifndef write16le
  369. static inline void
  370. write16le(uint8_t *buf, uint16_t num)
  371. {
  372. buf[0] = (uint8_t)num;
  373. buf[1] = (uint8_t)(num >> 8);
  374. return;
  375. }
  376. #endif
  377. #ifndef write32be
  378. static inline void
  379. write32be(uint8_t *buf, uint32_t num)
  380. {
  381. buf[0] = (uint8_t)(num >> 24);
  382. buf[1] = (uint8_t)(num >> 16);
  383. buf[2] = (uint8_t)(num >> 8);
  384. buf[3] = (uint8_t)num;
  385. return;
  386. }
  387. #endif
  388. #ifndef write32le
  389. static inline void
  390. write32le(uint8_t *buf, uint32_t num)
  391. {
  392. buf[0] = (uint8_t)num;
  393. buf[1] = (uint8_t)(num >> 8);
  394. buf[2] = (uint8_t)(num >> 16);
  395. buf[3] = (uint8_t)(num >> 24);
  396. return;
  397. }
  398. #endif
  399. //////////////////////////////
  400. // Aligned reads and writes //
  401. //////////////////////////////
  402. // Separate functions for aligned reads and writes are provided since on
  403. // strict-align archs aligned access is much faster than unaligned access.
  404. //
  405. // Just like in the unaligned case, memcpy() is needed to avoid
  406. // strict aliasing violations. However, on archs that don't support
  407. // unaligned access the compiler cannot know that the pointers given
  408. // to memcpy() are aligned which results in slow code. As of C11 there is
  409. // no standard way to tell the compiler that we know that the address is
  410. // aligned but some compilers have language extensions to do that. With
  411. // such language extensions the memcpy() method gives excellent results.
  412. //
  413. // What to do on a strict-align system when no known language extentensions
  414. // are available? Falling back to byte-by-byte access would be safe but ruin
  415. // optimizations that have been made specifically with aligned access in mind.
  416. // As a compromise, aligned reads will fall back to non-compliant type punning
  417. // but aligned writes will be byte-by-byte, that is, fast reads are preferred
  418. // over fast writes. This obviously isn't great but hopefully it's a working
  419. // compromise for now.
  420. //
  421. // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
  422. #ifdef HAVE___BUILTIN_ASSUME_ALIGNED
  423. # define tuklib_memcpy_aligned(dest, src, size) \
  424. memcpy(dest, __builtin_assume_aligned(src, size), size)
  425. #else
  426. # define tuklib_memcpy_aligned(dest, src, size) \
  427. memcpy(dest, src, size)
  428. # ifndef TUKLIB_FAST_UNALIGNED_ACCESS
  429. # define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
  430. # endif
  431. #endif
  432. static inline uint16_t
  433. aligned_read16ne(const uint8_t *buf)
  434. {
  435. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  436. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  437. return *(const uint16_t *)buf;
  438. #else
  439. uint16_t num;
  440. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  441. return num;
  442. #endif
  443. }
  444. static inline uint32_t
  445. aligned_read32ne(const uint8_t *buf)
  446. {
  447. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  448. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  449. return *(const uint32_t *)buf;
  450. #else
  451. uint32_t num;
  452. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  453. return num;
  454. #endif
  455. }
  456. static inline uint64_t
  457. aligned_read64ne(const uint8_t *buf)
  458. {
  459. #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
  460. || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
  461. return *(const uint64_t *)buf;
  462. #else
  463. uint64_t num;
  464. tuklib_memcpy_aligned(&num, buf, sizeof(num));
  465. return num;
  466. #endif
  467. }
  468. static inline void
  469. aligned_write16ne(uint8_t *buf, uint16_t num)
  470. {
  471. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  472. *(uint16_t *)buf = num;
  473. #else
  474. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  475. #endif
  476. return;
  477. }
  478. static inline void
  479. aligned_write32ne(uint8_t *buf, uint32_t num)
  480. {
  481. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  482. *(uint32_t *)buf = num;
  483. #else
  484. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  485. #endif
  486. return;
  487. }
  488. static inline void
  489. aligned_write64ne(uint8_t *buf, uint64_t num)
  490. {
  491. #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
  492. *(uint64_t *)buf = num;
  493. #else
  494. tuklib_memcpy_aligned(buf, &num, sizeof(num));
  495. #endif
  496. return;
  497. }
  498. static inline uint16_t
  499. aligned_read16be(const uint8_t *buf)
  500. {
  501. uint16_t num = aligned_read16ne(buf);
  502. return conv16be(num);
  503. }
  504. static inline uint16_t
  505. aligned_read16le(const uint8_t *buf)
  506. {
  507. uint16_t num = aligned_read16ne(buf);
  508. return conv16le(num);
  509. }
  510. static inline uint32_t
  511. aligned_read32be(const uint8_t *buf)
  512. {
  513. uint32_t num = aligned_read32ne(buf);
  514. return conv32be(num);
  515. }
  516. static inline uint32_t
  517. aligned_read32le(const uint8_t *buf)
  518. {
  519. uint32_t num = aligned_read32ne(buf);
  520. return conv32le(num);
  521. }
  522. static inline uint64_t
  523. aligned_read64be(const uint8_t *buf)
  524. {
  525. uint64_t num = aligned_read64ne(buf);
  526. return conv64be(num);
  527. }
  528. static inline uint64_t
  529. aligned_read64le(const uint8_t *buf)
  530. {
  531. uint64_t num = aligned_read64ne(buf);
  532. return conv64le(num);
  533. }
  534. // These need to be macros like in the unaligned case.
  535. #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
  536. #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
  537. #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
  538. #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
  539. #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
  540. #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
  541. ////////////////////
  542. // Bit operations //
  543. ////////////////////
  544. static inline uint32_t
  545. bsr32(uint32_t n)
  546. {
  547. // Check for ICC first, since it tends to define __GNUC__ too.
  548. #if defined(__INTEL_COMPILER)
  549. return _bit_scan_reverse(n);
  550. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  551. // GCC >= 3.4 has __builtin_clz(), which gives good results on
  552. // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
  553. // either plain BSR (so the XOR gets optimized away) or LZCNT and
  554. // XOR (if -march indicates that SSE4a instructions are supported).
  555. return (uint32_t)__builtin_clz(n) ^ 31U;
  556. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  557. uint32_t i;
  558. __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
  559. return i;
  560. #elif defined(_MSC_VER)
  561. unsigned long i;
  562. _BitScanReverse(&i, n);
  563. return i;
  564. #else
  565. uint32_t i = 31;
  566. if ((n & 0xFFFF0000) == 0) {
  567. n <<= 16;
  568. i = 15;
  569. }
  570. if ((n & 0xFF000000) == 0) {
  571. n <<= 8;
  572. i -= 8;
  573. }
  574. if ((n & 0xF0000000) == 0) {
  575. n <<= 4;
  576. i -= 4;
  577. }
  578. if ((n & 0xC0000000) == 0) {
  579. n <<= 2;
  580. i -= 2;
  581. }
  582. if ((n & 0x80000000) == 0)
  583. --i;
  584. return i;
  585. #endif
  586. }
  587. static inline uint32_t
  588. clz32(uint32_t n)
  589. {
  590. #if defined(__INTEL_COMPILER)
  591. return _bit_scan_reverse(n) ^ 31U;
  592. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
  593. return (uint32_t)__builtin_clz(n);
  594. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  595. uint32_t i;
  596. __asm__("bsrl %1, %0\n\t"
  597. "xorl $31, %0"
  598. : "=r" (i) : "rm" (n));
  599. return i;
  600. #elif defined(_MSC_VER)
  601. unsigned long i;
  602. _BitScanReverse(&i, n);
  603. return i ^ 31U;
  604. #else
  605. uint32_t i = 0;
  606. if ((n & 0xFFFF0000) == 0) {
  607. n <<= 16;
  608. i = 16;
  609. }
  610. if ((n & 0xFF000000) == 0) {
  611. n <<= 8;
  612. i += 8;
  613. }
  614. if ((n & 0xF0000000) == 0) {
  615. n <<= 4;
  616. i += 4;
  617. }
  618. if ((n & 0xC0000000) == 0) {
  619. n <<= 2;
  620. i += 2;
  621. }
  622. if ((n & 0x80000000) == 0)
  623. ++i;
  624. return i;
  625. #endif
  626. }
  627. static inline uint32_t
  628. ctz32(uint32_t n)
  629. {
  630. #if defined(__INTEL_COMPILER)
  631. return _bit_scan_forward(n);
  632. #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
  633. return (uint32_t)__builtin_ctz(n);
  634. #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  635. uint32_t i;
  636. __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
  637. return i;
  638. #elif defined(_MSC_VER)
  639. unsigned long i;
  640. _BitScanForward(&i, n);
  641. return i;
  642. #else
  643. uint32_t i = 0;
  644. if ((n & 0x0000FFFF) == 0) {
  645. n >>= 16;
  646. i = 16;
  647. }
  648. if ((n & 0x000000FF) == 0) {
  649. n >>= 8;
  650. i += 8;
  651. }
  652. if ((n & 0x0000000F) == 0) {
  653. n >>= 4;
  654. i += 4;
  655. }
  656. if ((n & 0x00000003) == 0) {
  657. n >>= 2;
  658. i += 2;
  659. }
  660. if ((n & 0x00000001) == 0)
  661. ++i;
  662. return i;
  663. #endif
  664. }
  665. #define bsf32 ctz32
  666. #endif