cpu_detect.cc 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. // Copyright 2022 The Abseil Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/crc/internal/cpu_detect.h"
  15. #include <cstdint>
  16. #include <string>
  17. #include "absl/base/config.h"
  18. #include "absl/types/optional.h" // IWYU pragma: keep
  19. #if defined(__aarch64__) && defined(__linux__)
  20. #include <asm/hwcap.h>
  21. #include <sys/auxv.h>
  22. #endif
  23. #if defined(__aarch64__) && defined(__APPLE__)
  24. #if defined(__has_include) && __has_include(<arm/cpu_capabilities_public.h>)
  25. #include <arm/cpu_capabilities_public.h>
  26. #endif
  27. #include <sys/sysctl.h>
  28. #include <sys/types.h>
  29. #endif
  30. #if defined(_WIN32) || defined(_WIN64)
  31. #include <intrin.h>
  32. #endif
  33. #if defined(__x86_64__) || defined(_M_X64)
  34. #if ABSL_HAVE_BUILTIN(__cpuid)
  35. // MSVC-equivalent __cpuid intrinsic declaration for clang-like compilers
  36. // for non-Windows build environments.
  37. extern void __cpuid(int[4], int);
  38. #elif !defined(_WIN32) && !defined(_WIN64)
  39. // MSVC defines this function for us.
  40. // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex
  41. static void __cpuid(int cpu_info[4], int info_type) {
  42. __asm__ volatile("cpuid \n\t"
  43. : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
  44. "=d"(cpu_info[3])
  45. : "a"(info_type), "c"(0));
  46. }
  47. #endif // !defined(_WIN32) && !defined(_WIN64)
  48. #endif // defined(__x86_64__) || defined(_M_X64)
  49. namespace absl {
  50. ABSL_NAMESPACE_BEGIN
  51. namespace crc_internal {
  52. #if defined(__x86_64__) || defined(_M_X64)
  53. namespace {
  54. enum class Vendor {
  55. kUnknown,
  56. kIntel,
  57. kAmd,
  58. };
  59. Vendor GetVendor() {
  60. // Get the vendor string (issue CPUID with eax = 0).
  61. int cpu_info[4];
  62. __cpuid(cpu_info, 0);
  63. std::string vendor;
  64. vendor.append(reinterpret_cast<char*>(&cpu_info[1]), 4);
  65. vendor.append(reinterpret_cast<char*>(&cpu_info[3]), 4);
  66. vendor.append(reinterpret_cast<char*>(&cpu_info[2]), 4);
  67. if (vendor == "GenuineIntel") {
  68. return Vendor::kIntel;
  69. } else if (vendor == "AuthenticAMD") {
  70. return Vendor::kAmd;
  71. } else {
  72. return Vendor::kUnknown;
  73. }
  74. }
  75. CpuType GetIntelCpuType() {
  76. // To get general information and extended features we send eax = 1 and
  77. // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
  78. // (See Intel 64 and IA-32 Architectures Software Developer's Manual
  79. // Volume 2A: Instruction Set Reference, A-M CPUID).
  80. // https://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-2a-manual.html
  81. // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex
  82. int cpu_info[4];
  83. __cpuid(cpu_info, 1);
  84. // Response in eax bits as follows:
  85. // 0-3 (stepping id)
  86. // 4-7 (model number),
  87. // 8-11 (family code),
  88. // 12-13 (processor type),
  89. // 16-19 (extended model)
  90. // 20-27 (extended family)
  91. int family = (cpu_info[0] >> 8) & 0x0f;
  92. int model_num = (cpu_info[0] >> 4) & 0x0f;
  93. int ext_family = (cpu_info[0] >> 20) & 0xff;
  94. int ext_model_num = (cpu_info[0] >> 16) & 0x0f;
  95. int brand_id = cpu_info[1] & 0xff;
  96. // Process the extended family and model info if necessary
  97. if (family == 0x0f) {
  98. family += ext_family;
  99. }
  100. if (family == 0x0f || family == 0x6) {
  101. model_num += (ext_model_num << 4);
  102. }
  103. switch (brand_id) {
  104. case 0: // no brand ID, so parse CPU family/model
  105. switch (family) {
  106. case 6: // Most PentiumIII processors are in this category
  107. switch (model_num) {
  108. case 0x2c: // Westmere: Gulftown
  109. return CpuType::kIntelWestmere;
  110. case 0x2d: // Sandybridge
  111. return CpuType::kIntelSandybridge;
  112. case 0x3e: // Ivybridge
  113. return CpuType::kIntelIvybridge;
  114. case 0x3c: // Haswell (client)
  115. case 0x3f: // Haswell
  116. return CpuType::kIntelHaswell;
  117. case 0x4f: // Broadwell
  118. case 0x56: // BroadwellDE
  119. return CpuType::kIntelBroadwell;
  120. case 0x55: // Skylake Xeon
  121. if ((cpu_info[0] & 0x0f) < 5) { // stepping < 5 is skylake
  122. return CpuType::kIntelSkylakeXeon;
  123. } else { // stepping >= 5 is cascadelake
  124. return CpuType::kIntelCascadelakeXeon;
  125. }
  126. case 0x5e: // Skylake (client)
  127. return CpuType::kIntelSkylake;
  128. default:
  129. return CpuType::kUnknown;
  130. }
  131. default:
  132. return CpuType::kUnknown;
  133. }
  134. default:
  135. return CpuType::kUnknown;
  136. }
  137. }
  138. CpuType GetAmdCpuType() {
  139. // To get general information and extended features we send eax = 1 and
  140. // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
  141. // (See Intel 64 and IA-32 Architectures Software Developer's Manual
  142. // Volume 2A: Instruction Set Reference, A-M CPUID).
  143. // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex
  144. int cpu_info[4];
  145. __cpuid(cpu_info, 1);
  146. // Response in eax bits as follows:
  147. // 0-3 (stepping id)
  148. // 4-7 (model number),
  149. // 8-11 (family code),
  150. // 12-13 (processor type),
  151. // 16-19 (extended model)
  152. // 20-27 (extended family)
  153. int family = (cpu_info[0] >> 8) & 0x0f;
  154. int model_num = (cpu_info[0] >> 4) & 0x0f;
  155. int ext_family = (cpu_info[0] >> 20) & 0xff;
  156. int ext_model_num = (cpu_info[0] >> 16) & 0x0f;
  157. if (family == 0x0f) {
  158. family += ext_family;
  159. model_num += (ext_model_num << 4);
  160. }
  161. switch (family) {
  162. case 0x17:
  163. switch (model_num) {
  164. case 0x0: // Stepping Ax
  165. case 0x1: // Stepping Bx
  166. return CpuType::kAmdNaples;
  167. case 0x30: // Stepping Ax
  168. case 0x31: // Stepping Bx
  169. return CpuType::kAmdRome;
  170. default:
  171. return CpuType::kUnknown;
  172. }
  173. break;
  174. case 0x19:
  175. switch (model_num) {
  176. case 0x0: // Stepping Ax
  177. case 0x1: // Stepping B0
  178. return CpuType::kAmdMilan;
  179. case 0x10: // Stepping A0
  180. case 0x11: // Stepping B0
  181. return CpuType::kAmdGenoa;
  182. case 0x44: // Stepping A0
  183. return CpuType::kAmdRyzenV3000;
  184. default:
  185. return CpuType::kUnknown;
  186. }
  187. break;
  188. default:
  189. return CpuType::kUnknown;
  190. }
  191. }
  192. } // namespace
  193. CpuType GetCpuType() {
  194. switch (GetVendor()) {
  195. case Vendor::kIntel:
  196. return GetIntelCpuType();
  197. case Vendor::kAmd:
  198. return GetAmdCpuType();
  199. default:
  200. return CpuType::kUnknown;
  201. }
  202. }
  203. bool SupportsArmCRC32PMULL() { return false; }
  204. #elif defined(__aarch64__) && defined(__linux__)
  205. #ifndef HWCAP_CPUID
  206. #define HWCAP_CPUID (1 << 11)
  207. #endif
  208. #define ABSL_INTERNAL_AARCH64_ID_REG_READ(id, val) \
  209. asm("mrs %0, " #id : "=r"(val))
  210. CpuType GetCpuType() {
  211. // MIDR_EL1 is not visible to EL0, however the access will be emulated by
  212. // linux if AT_HWCAP has HWCAP_CPUID set.
  213. //
  214. // This method will be unreliable on heterogeneous computing systems (ex:
  215. // big.LITTLE) since the value of MIDR_EL1 will change based on the calling
  216. // thread.
  217. uint64_t hwcaps = getauxval(AT_HWCAP);
  218. if (hwcaps & HWCAP_CPUID) {
  219. uint64_t midr = 0;
  220. ABSL_INTERNAL_AARCH64_ID_REG_READ(MIDR_EL1, midr);
  221. uint32_t implementer = (midr >> 24) & 0xff;
  222. uint32_t part_number = (midr >> 4) & 0xfff;
  223. switch (implementer) {
  224. case 0x41:
  225. switch (part_number) {
  226. case 0xd0c: return CpuType::kArmNeoverseN1;
  227. case 0xd40: return CpuType::kArmNeoverseV1;
  228. case 0xd49: return CpuType::kArmNeoverseN2;
  229. case 0xd4f: return CpuType::kArmNeoverseV2;
  230. default:
  231. return CpuType::kUnknown;
  232. }
  233. break;
  234. case 0xc0:
  235. switch (part_number) {
  236. case 0xac3: return CpuType::kAmpereSiryn;
  237. default:
  238. return CpuType::kUnknown;
  239. }
  240. break;
  241. default:
  242. return CpuType::kUnknown;
  243. }
  244. }
  245. return CpuType::kUnknown;
  246. }
  247. bool SupportsArmCRC32PMULL() {
  248. #if defined(HWCAP_CRC32) && defined(HWCAP_PMULL)
  249. uint64_t hwcaps = getauxval(AT_HWCAP);
  250. return (hwcaps & HWCAP_CRC32) && (hwcaps & HWCAP_PMULL);
  251. #else
  252. return false;
  253. #endif
  254. }
  255. #elif defined(__aarch64__) && defined(__APPLE__)
  256. CpuType GetCpuType() { return CpuType::kUnknown; }
  257. template <typename T>
  258. static absl::optional<T> ReadSysctlByName(const char* name) {
  259. T val;
  260. size_t val_size = sizeof(T);
  261. int ret = sysctlbyname(name, &val, &val_size, nullptr, 0);
  262. if (ret == -1) {
  263. return absl::nullopt;
  264. }
  265. return val;
  266. }
  267. bool SupportsArmCRC32PMULL() {
  268. // Newer XNU kernels support querying all capabilities in a single
  269. // sysctlbyname.
  270. #if defined(CAP_BIT_CRC32) && defined(CAP_BIT_FEAT_PMULL)
  271. static const absl::optional<uint64_t> caps =
  272. ReadSysctlByName<uint64_t>("hw.optional.arm.caps");
  273. if (caps.has_value()) {
  274. constexpr uint64_t kCrc32AndPmullCaps =
  275. (uint64_t{1} << CAP_BIT_CRC32) | (uint64_t{1} << CAP_BIT_FEAT_PMULL);
  276. return (*caps & kCrc32AndPmullCaps) == kCrc32AndPmullCaps;
  277. }
  278. #endif
  279. // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619
  280. static const absl::optional<int> armv8_crc32 =
  281. ReadSysctlByName<int>("hw.optional.armv8_crc32");
  282. if (armv8_crc32.value_or(0) == 0) {
  283. return false;
  284. }
  285. // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3918855
  286. static const absl::optional<int> feat_pmull =
  287. ReadSysctlByName<int>("hw.optional.arm.FEAT_PMULL");
  288. if (feat_pmull.value_or(0) == 0) {
  289. return false;
  290. }
  291. return true;
  292. }
  293. #else
  294. CpuType GetCpuType() { return CpuType::kUnknown; }
  295. bool SupportsArmCRC32PMULL() { return false; }
  296. #endif
  297. } // namespace crc_internal
  298. ABSL_NAMESPACE_END
  299. } // namespace absl