platform.h 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. // Copyright 2010 Google Inc. All rights reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Detects configuration and defines compiler-specific macros.
  15. // Also, sets user-defined CRUTIL_USE_* macros to default values.
  16. #ifndef CRCUTIL_PLATFORM_H_
  17. #define CRCUTIL_PLATFORM_H_
  18. // Permanently disable some annoying warnings generated
  19. // by Microsoft CL when compiling Microsoft's headers.
  20. #include "std_headers.h"
  21. // Use inline asm version of the code?
  22. #if !defined(CRCUTIL_USE_ASM)
  23. #define CRCUTIL_USE_ASM 1
  24. #endif // !defined(CRCUTIL_USE_ASM)
  25. #if !defined(HAVE_I386)
  26. #if defined(__i386__) || defined(_M_IX86)
  27. #define HAVE_I386 1
  28. #else
  29. #define HAVE_I386 0
  30. #endif // defined(__i386__) || defined(_M_IX86)
  31. #endif // defined(HAVE_I386)
  32. #if !defined(HAVE_AMD64)
  33. #if defined(__amd64__) || defined(_M_AMD64)
  34. #define HAVE_AMD64 1
  35. #else
  36. #define HAVE_AMD64 0
  37. #endif // defined(__amd64__) || defined(_M_AMD64)
  38. #endif // defined(HAVE_AMD64)
  39. #if HAVE_AMD64 || HAVE_I386
  40. #if defined(_MSC_VER)
  41. #pragma warning(push)
  42. // '_M_IX86' is not defined as a preprocessor macro
  43. #pragma warning(disable: 4668)
  44. #include <intrin.h>
  45. #pragma warning(pop)
  46. #endif // defined(_MSC_VER)
  47. #if !defined(HAVE_MMX)
  48. #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
  49. #define HAVE_MMX 1
  50. #else
  51. #define HAVE_MMX 0
  52. #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__MMX__))
  53. #endif // !defined(HAVE_MMX)
  54. #if !defined(HAVE_SSE)
  55. #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
  56. #include <xmmintrin.h>
  57. #define HAVE_SSE 1
  58. #else
  59. #define HAVE_SSE 0
  60. #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE__))
  61. #endif // !defined(HAVE_SSE)
  62. #if !defined(HAVE_SSE2)
  63. #if defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
  64. #include <emmintrin.h>
  65. #define HAVE_SSE2 1
  66. #else
  67. #define HAVE_SSE2 0
  68. #endif // defined(_MSC_VER) || (defined(__GNUC__) && defined(__SSE2__))
  69. #endif // !defined(HAVE_SSE2)
  70. #else
  71. #if !defined(HAVE_MMX)
  72. #define HAVE_MMX 0
  73. #endif // !defined(HAVE_MMX)
  74. #if !defined(HAVE_SSE)
  75. #define HAVE_SSE 0
  76. #endif // !defined(HAVE_SSE)
  77. #if !defined(HAVE_SSE2)
  78. #define HAVE_SSE2 0
  79. #endif // !defined(HAVE_SSE2)
  80. #endif // HAVE_AMD64 || HAVE_I386
  81. // Error checking
  82. #if HAVE_SSE && !HAVE_MMX
  83. #error SSE is available but not MMX?
  84. #endif // HAVE_SSE && !HAVE_MMX
  85. #if HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
  86. #error SSE2 is available but not SSE or MMX?
  87. #endif // HAVE_SSE2 && (!HAVE_SSE || !HAVE_MMX)
  88. #if !defined(CRCUTIL_PREFETCH_WIDTH)
  89. // On newer X5550 CPU, heavily optimized CrcMultiword is 3% faster without
  90. // prefetch for inputs smaller than 8MB and less than 1% slower for 8MB and
  91. // larger blocks. On older Q9650 CPU, the code is 2-3% faster for inputs
  92. // smaller than 8MB, 4-5% slower when length >= 8MB.
  93. // Tested with prefetch length 256, 512, and 4096.
  94. //
  95. // At this moment there is no compelling reason to use prefetching.
  96. //
  97. #define CRCUTIL_PREFETCH_WIDTH 0
  98. #endif // !defined(CRCUTIL_PREFETCH_WIDTH)
  99. #if HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
  100. #define PREFETCH(src) \
  101. _mm_prefetch(reinterpret_cast<const char *>(src) + CRCUTIL_PREFETCH_WIDTH, \
  102. _MM_HINT_T0)
  103. #else
  104. #define PREFETCH(src)
  105. #endif // HAVE_SSE && CRCUTIL_PREFETCH_WIDTH > 0
  106. // If block size exceeds CRCUTIL_MIN_ALIGN_SIZE, align the data
  107. // before accessing it at word boundary. See generic_crc.cc,
  108. // ALIGN_ON_WORD_BOUNDARY_IF_NEEDED() macro.
  109. #if !defined(CRCUTIL_MIN_ALIGN_SIZE)
  110. #if HAVE_AMD64 || HAVE_I386
  111. #define CRCUTIL_MIN_ALIGN_SIZE (1024)
  112. #else
  113. #define CRCUTIL_MIN_ALIGN_SIZE 0
  114. #endif // HAVE_AMD64 || HAVE_I386
  115. #endif // !defined(CRCUTIL_MIN_ALIGN_SIZE)
  116. // Use _mm_crc32_u64/32/8 intrinics?
  117. // If not, they will be implemented in software.
  118. #if !HAVE_I386 && !HAVE_AMD64
  119. #undef CRCUTIL_USE_MM_CRC32
  120. #define CRCUTIL_USE_MM_CRC32 0
  121. #else
  122. #if !defined(CRCUTIL_USE_MM_CRC32)
  123. #if defined(_MSC_VER) || defined(__GNUC__)
  124. #define CRCUTIL_USE_MM_CRC32 1
  125. #else
  126. #define CRCUTIL_USE_MM_CRC32 0
  127. #endif // defined(_MSC_VER) || defined(__GNUC__)
  128. #endif // !defined(CRCUTIL_USE_MM_CRC32)
  129. #endif // !HAVE_I386 && !HAVE_AMD64
  130. // Stringize -- always handy.
  131. #define TO_STRING_VALUE(arg) #arg
  132. #define TO_STRING(arg) TO_STRING_VALUE(arg)
  133. // Compilers give "right shift count >= width of type" warning even
  134. // though the shift happens only under appropriate "if".
  135. #define SHIFT_RIGHT_NO_WARNING(value, bits) \
  136. ((value) >> (((bits) < (8 * sizeof(value))) ? (bits) : 0))
  137. #define SHIFT_RIGHT_SAFE(value, bits) \
  138. ((bits) < (8 * sizeof(value)) ? SHIFT_RIGHT_NO_WARNING(value, bits) : 0)
  139. // The same for left shifts.
  140. #define SHIFT_LEFT_NO_WARNING(value, bits) \
  141. ((value) << (((bits) < (8 * sizeof(value))) ? (bits) : 0))
  142. #define SHIFT_LEFT_SAFE(value, bits) \
  143. ((bits) < (8 * sizeof(value)) ? SHIFT_LEFT_NO_WARNING(value, bits) : 0)
  144. // GCC-specific macros.
  145. //
  146. #define GCC_VERSION_AVAILABLE(major, minor) \
  147. (defined(__GNUC__) && \
  148. (__GNUC__ > (major) || \
  149. (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
  150. #if defined(__GNUC__)
  151. // The GenericCrc tables must be properly aligned.
  152. // Penalty for misalignment? 50% performance degradation.
  153. // For 128-bit SSE2, the penalty is access violation.
  154. #define GCC_ALIGN_ATTRIBUTE(n) __attribute__((aligned(n)))
  155. #if GCC_VERSION_AVAILABLE(4, 4)
  156. // If not marked as "omit frame pointer",
  157. // GCC won't be able to find enough registers.
  158. #define GCC_OMIT_FRAME_POINTER \
  159. __attribute__((__optimize__(2, "omit-frame-pointer")))
  160. #endif // GCC_VERSION_AVAILABLE(4, 4)
  161. #if !defined(__forceinline)
  162. #define __forceinline __attribute__((__always_inline__)) inline
  163. #endif // !defined(__forceinline)
  164. #if defined(__APPLE_CC__)
  165. // The version of GCC used by Max OS X xCode v 5664 does not understand
  166. // "movq xmm, r64" instruction and requires the use of "movd" (probably
  167. // because of the bug in GCC which treats "movq/movd xmm,r64 or r64,xmm"
  168. // the same).
  169. //
  170. // Leaving common sense aside, let's peek into Intel's instruction
  171. // reference manual. That's what description of MOVD command says:
  172. // MOVD xmm, r/m32 (opcode 66 0F 6E /r)
  173. // MOVD r/m32, xmm (opcode 66 0F 7E /r)
  174. // MOVQ xmm, r/m64 (opcode 66 REX.W 0F 6E /r)
  175. // MOVQ r/m64, xmm (opcode 66 REX.W 0F 7E /r)
  176. #define SSE2_MOVQ "movd"
  177. #else
  178. #define SSE2_MOVQ "movq"
  179. #endif // defined(__APPLE_CC__)
  180. #endif // defined(__GNUC__)
  181. // Define compiler-specific macros that were not set yet.
  182. #if !defined(_MSC_VER) && !defined(__forceinline)
  183. #define __forceinline inline
  184. #endif // !defined(_MSC_VER) && !defined(__forceinline)
  185. #if !defined(GCC_OMIT_FRAME_POINTER)
  186. #define GCC_OMIT_FRAME_POINTER
  187. #endif // !defined(GCC_OMIT_FRAME_POINTER)
  188. #if !defined(GCC_ALIGN_ATTRIBUTE)
  189. #define GCC_ALIGN_ATTRIBUTE(n)
  190. #endif // !defined(GCC_ALIGN_ATTRIBUTE)
  191. #endif // CRCUTIL_PLATFORM_H_