cpu_id.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. #include "cpu_id.h"
  2. #include "types.h"
  3. #include "platform.h"
  4. #include <util/generic/singleton.h>
  5. #if defined(_win_)
  6. #include <intrin.h>
  7. #include <immintrin.h>
  8. #elif defined(_x86_)
  9. #include <cpuid.h>
  10. #endif
  11. #include <string.h>
  12. #if defined(_x86_) && !defined(_win_)
  13. static ui64 _xgetbv(ui32 xcr) {
  14. ui32 eax;
  15. ui32 edx;
  16. __asm__ volatile(
  17. "xgetbv"
  18. : "=a"(eax), "=d"(edx)
  19. : "c"(xcr));
  20. return (static_cast<ui64>(edx) << 32) | eax;
  21. }
  22. #endif
  23. bool NX86::CpuId(ui32 op, ui32 subOp, ui32* res) noexcept {
  24. #if defined(_x86_)
  25. #if defined(_MSC_VER)
  26. static_assert(sizeof(int) == sizeof(ui32), "ups, something wrong here");
  27. __cpuidex((int*)res, op, subOp);
  28. #else
  29. __cpuid_count(op, subOp, res[0], res[1], res[2], res[3]);
  30. #endif
  31. return true;
  32. #else
  33. (void)op;
  34. (void)subOp;
  35. memset(res, 0, 4 * sizeof(ui32));
  36. return false;
  37. #endif
  38. }
  39. bool NX86::CpuId(ui32 op, ui32* res) noexcept {
  40. #if defined(_x86_)
  41. #if defined(_MSC_VER)
  42. static_assert(sizeof(int) == sizeof(ui32), "ups, something wrong here");
  43. __cpuid((int*)res, op);
  44. #else
  45. __cpuid(op, res[0], res[1], res[2], res[3]);
  46. #endif
  47. return true;
  48. #else
  49. (void)op;
  50. memset(res, 0, 4 * sizeof(ui32));
  51. return false;
  52. #endif
  53. }
  54. namespace {
  55. union TX86CpuInfo {
  56. ui32 Info[4];
  57. struct {
  58. ui32 EAX;
  59. ui32 EBX;
  60. ui32 ECX;
  61. ui32 EDX;
  62. };
  63. inline TX86CpuInfo(ui32 op) noexcept {
  64. NX86::CpuId(op, Info);
  65. }
  66. inline TX86CpuInfo(ui32 op, ui32 subOp) noexcept {
  67. NX86::CpuId(op, subOp, Info);
  68. }
  69. };
  70. static_assert(sizeof(TX86CpuInfo) == 16, "please, fix me");
  71. } // namespace
  72. // https://en.wikipedia.org/wiki/CPUID
  73. bool NX86::HaveRDTSCP() noexcept {
  74. return (TX86CpuInfo(0x80000001).EDX >> 27) & 1u;
  75. }
  76. bool NX86::HaveSSE() noexcept {
  77. return (TX86CpuInfo(0x1).EDX >> 25) & 1u;
  78. }
  79. bool NX86::HaveSSE2() noexcept {
  80. return (TX86CpuInfo(0x1).EDX >> 26) & 1u;
  81. }
  82. bool NX86::HaveSSE3() noexcept {
  83. return TX86CpuInfo(0x1).ECX & 1u;
  84. }
  85. bool NX86::HavePCLMUL() noexcept {
  86. return (TX86CpuInfo(0x1).ECX >> 1) & 1u;
  87. }
  88. bool NX86::HaveSSSE3() noexcept {
  89. return (TX86CpuInfo(0x1).ECX >> 9) & 1u;
  90. }
  91. bool NX86::HaveSSE41() noexcept {
  92. return (TX86CpuInfo(0x1).ECX >> 19) & 1u;
  93. }
  94. bool NX86::HaveSSE42() noexcept {
  95. return (TX86CpuInfo(0x1).ECX >> 20) & 1u;
  96. }
  97. bool NX86::HaveF16C() noexcept {
  98. return (TX86CpuInfo(0x1).ECX >> 29) & 1u;
  99. }
  100. bool NX86::HavePOPCNT() noexcept {
  101. return (TX86CpuInfo(0x1).ECX >> 23) & 1u;
  102. }
  103. bool NX86::HaveAES() noexcept {
  104. return (TX86CpuInfo(0x1).ECX >> 25) & 1u;
  105. }
  106. bool NX86::HaveXSAVE() noexcept {
  107. return (TX86CpuInfo(0x1).ECX >> 26) & 1u;
  108. }
  109. bool NX86::HaveOSXSAVE() noexcept {
  110. return (TX86CpuInfo(0x1).ECX >> 27) & 1u;
  111. }
  112. bool NX86::HaveAVX() noexcept {
  113. #if defined(_x86_)
  114. // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
  115. // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
  116. return HaveOSXSAVE() // implies HaveXSAVE()
  117. && (_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
  118. && ((TX86CpuInfo(0x1).ECX >> 28) & 1u); // AVX bit
  119. #else
  120. return false;
  121. #endif
  122. }
  123. bool NX86::HaveFMA() noexcept {
  124. return HaveAVX() && ((TX86CpuInfo(0x1).ECX >> 12) & 1u);
  125. }
  126. bool NX86::HaveAVX2() noexcept {
  127. return HaveAVX() && ((TX86CpuInfo(0x7, 0).EBX >> 5) & 1u);
  128. }
  129. bool NX86::HaveBMI1() noexcept {
  130. return (TX86CpuInfo(0x7, 0).EBX >> 3) & 1u;
  131. }
  132. bool NX86::HaveBMI2() noexcept {
  133. return (TX86CpuInfo(0x7, 0).EBX >> 8) & 1u;
  134. }
  135. bool NX86::HaveAVX512F() noexcept {
  136. #if defined(_x86_)
  137. // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
  138. return HaveOSXSAVE() // implies HaveXSAVE()
  139. && (_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
  140. && ((_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
  141. && TX86CpuInfo(0x0).EAX >= 0x7 // leaf 7 is present
  142. && ((TX86CpuInfo(0x7, 0).EBX >> 16) & 1u); // AVX512F bit
  143. #else
  144. return false;
  145. #endif
  146. }
  147. bool NX86::HaveAVX512DQ() noexcept {
  148. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 17) & 1u);
  149. }
  150. bool NX86::HaveRDSEED() noexcept {
  151. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 18) & 1u);
  152. }
  153. bool NX86::HaveADX() noexcept {
  154. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 19) & 1u);
  155. }
  156. bool NX86::HaveAVX512IFMA() noexcept {
  157. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 21) & 1u);
  158. }
  159. bool NX86::HavePCOMMIT() noexcept {
  160. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 22) & 1u);
  161. }
  162. bool NX86::HaveCLFLUSHOPT() noexcept {
  163. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 23) & 1u);
  164. }
  165. bool NX86::HaveCLWB() noexcept {
  166. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 24) & 1u);
  167. }
  168. bool NX86::HaveAVX512PF() noexcept {
  169. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 26) & 1u);
  170. }
  171. bool NX86::HaveAVX512ER() noexcept {
  172. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 27) & 1u);
  173. }
  174. bool NX86::HaveAVX512CD() noexcept {
  175. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 28) & 1u);
  176. }
  177. bool NX86::HaveSHA() noexcept {
  178. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).EBX >> 29) & 1u);
  179. }
  180. bool NX86::HaveAVX512BW() noexcept {
  181. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 30) & 1u);
  182. }
  183. bool NX86::HaveAVX512VL() noexcept {
  184. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).EBX >> 31) & 1u);
  185. }
  186. bool NX86::HavePREFETCHWT1() noexcept {
  187. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x7, 0).ECX >> 0) & 1u);
  188. }
  189. bool NX86::HaveAVX512VBMI() noexcept {
  190. return HaveAVX512F() && ((TX86CpuInfo(0x7, 0).ECX >> 1) & 1u);
  191. }
  192. bool NX86::HaveRDRAND() noexcept {
  193. return TX86CpuInfo(0x0).EAX >= 0x7 && ((TX86CpuInfo(0x1).ECX >> 30) & 1u);
  194. }
  195. const char* CpuBrand(ui32* store) noexcept {
  196. memset(store, 0, 12 * sizeof(*store));
  197. #if defined(_x86_)
  198. NX86::CpuId(0x80000002, store);
  199. NX86::CpuId(0x80000003, store + 4);
  200. NX86::CpuId(0x80000004, store + 8);
  201. #endif
  202. return (const char*)store;
  203. }
  204. #define Y_DEF_NAME(X) \
  205. bool NX86::CachedHave##X() noexcept { \
  206. return SingletonWithPriority<TFlagsCache, 0>()->Have##X##_; \
  207. }
  208. Y_CPU_ID_ENUMERATE_OUTLINED_CACHED_DEFINE(Y_DEF_NAME)
  209. #undef Y_DEF_NAME