x86.c 35 KB


  1. //===-- cpu_model/x86.c - Support for __cpu_model builtin --------*- C -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is based on LLVM's lib/Support/Host.cpp.
  10. // It implements the operating system Host concept and builtin
  11. // __cpu_model for the compiler_rt library for x86.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "cpu_model.h"
  15. #if !(defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
  16. defined(_M_X64))
  17. #error This file is intended only for x86-based targets
  18. #endif
  19. #if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
  20. #include <assert.h>
  21. #ifdef _MSC_VER
  22. #include <intrin.h>
  23. #endif
  24. enum VendorSignatures {
  25. SIG_INTEL = 0x756e6547, // Genu
  26. SIG_AMD = 0x68747541, // Auth
  27. };
  28. enum ProcessorVendors {
  29. VENDOR_INTEL = 1,
  30. VENDOR_AMD,
  31. VENDOR_OTHER,
  32. VENDOR_MAX
  33. };
  34. enum ProcessorTypes {
  35. INTEL_BONNELL = 1,
  36. INTEL_CORE2,
  37. INTEL_COREI7,
  38. AMDFAM10H,
  39. AMDFAM15H,
  40. INTEL_SILVERMONT,
  41. INTEL_KNL,
  42. AMD_BTVER1,
  43. AMD_BTVER2,
  44. AMDFAM17H,
  45. INTEL_KNM,
  46. INTEL_GOLDMONT,
  47. INTEL_GOLDMONT_PLUS,
  48. INTEL_TREMONT,
  49. AMDFAM19H,
  50. ZHAOXIN_FAM7H,
  51. INTEL_SIERRAFOREST,
  52. INTEL_GRANDRIDGE,
  53. INTEL_CLEARWATERFOREST,
  54. AMDFAM1AH,
  55. CPU_TYPE_MAX
  56. };
  57. enum ProcessorSubtypes {
  58. INTEL_COREI7_NEHALEM = 1,
  59. INTEL_COREI7_WESTMERE,
  60. INTEL_COREI7_SANDYBRIDGE,
  61. AMDFAM10H_BARCELONA,
  62. AMDFAM10H_SHANGHAI,
  63. AMDFAM10H_ISTANBUL,
  64. AMDFAM15H_BDVER1,
  65. AMDFAM15H_BDVER2,
  66. AMDFAM15H_BDVER3,
  67. AMDFAM15H_BDVER4,
  68. AMDFAM17H_ZNVER1,
  69. INTEL_COREI7_IVYBRIDGE,
  70. INTEL_COREI7_HASWELL,
  71. INTEL_COREI7_BROADWELL,
  72. INTEL_COREI7_SKYLAKE,
  73. INTEL_COREI7_SKYLAKE_AVX512,
  74. INTEL_COREI7_CANNONLAKE,
  75. INTEL_COREI7_ICELAKE_CLIENT,
  76. INTEL_COREI7_ICELAKE_SERVER,
  77. AMDFAM17H_ZNVER2,
  78. INTEL_COREI7_CASCADELAKE,
  79. INTEL_COREI7_TIGERLAKE,
  80. INTEL_COREI7_COOPERLAKE,
  81. INTEL_COREI7_SAPPHIRERAPIDS,
  82. INTEL_COREI7_ALDERLAKE,
  83. AMDFAM19H_ZNVER3,
  84. INTEL_COREI7_ROCKETLAKE,
  85. ZHAOXIN_FAM7H_LUJIAZUI,
  86. AMDFAM19H_ZNVER4,
  87. INTEL_COREI7_GRANITERAPIDS,
  88. INTEL_COREI7_GRANITERAPIDS_D,
  89. INTEL_COREI7_ARROWLAKE,
  90. INTEL_COREI7_ARROWLAKE_S,
  91. INTEL_COREI7_PANTHERLAKE,
  92. AMDFAM1AH_ZNVER5,
  93. CPU_SUBTYPE_MAX
  94. };
  95. enum ProcessorFeatures {
  96. FEATURE_CMOV = 0,
  97. FEATURE_MMX,
  98. FEATURE_POPCNT,
  99. FEATURE_SSE,
  100. FEATURE_SSE2,
  101. FEATURE_SSE3,
  102. FEATURE_SSSE3,
  103. FEATURE_SSE4_1,
  104. FEATURE_SSE4_2,
  105. FEATURE_AVX,
  106. FEATURE_AVX2,
  107. FEATURE_SSE4_A,
  108. FEATURE_FMA4,
  109. FEATURE_XOP,
  110. FEATURE_FMA,
  111. FEATURE_AVX512F,
  112. FEATURE_BMI,
  113. FEATURE_BMI2,
  114. FEATURE_AES,
  115. FEATURE_PCLMUL,
  116. FEATURE_AVX512VL,
  117. FEATURE_AVX512BW,
  118. FEATURE_AVX512DQ,
  119. FEATURE_AVX512CD,
  120. FEATURE_AVX512ER,
  121. FEATURE_AVX512PF,
  122. FEATURE_AVX512VBMI,
  123. FEATURE_AVX512IFMA,
  124. FEATURE_AVX5124VNNIW,
  125. FEATURE_AVX5124FMAPS,
  126. FEATURE_AVX512VPOPCNTDQ,
  127. FEATURE_AVX512VBMI2,
  128. FEATURE_GFNI,
  129. FEATURE_VPCLMULQDQ,
  130. FEATURE_AVX512VNNI,
  131. FEATURE_AVX512BITALG,
  132. FEATURE_AVX512BF16,
  133. FEATURE_AVX512VP2INTERSECT,
  134. // FIXME: Below Features has some missings comparing to gcc, it's because gcc
  135. // has some not one-to-one mapped in llvm.
  136. // FEATURE_3DNOW,
  137. // FEATURE_3DNOWP,
  138. FEATURE_ADX = 40,
  139. // FEATURE_ABM,
  140. FEATURE_CLDEMOTE = 42,
  141. FEATURE_CLFLUSHOPT,
  142. FEATURE_CLWB,
  143. FEATURE_CLZERO,
  144. FEATURE_CMPXCHG16B,
  145. // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
  146. // a cpu string with no X86_FEATURE_COMPAT features, which is required in
  147. // current implementantion of cpu_specific/cpu_dispatch FMV feature.
  148. // FEATURE_CMPXCHG8B,
  149. FEATURE_ENQCMD = 48,
  150. FEATURE_F16C,
  151. FEATURE_FSGSBASE,
  152. // FEATURE_FXSAVE,
  153. // FEATURE_HLE,
  154. // FEATURE_IBT,
  155. FEATURE_LAHF_LM = 54,
  156. FEATURE_LM,
  157. FEATURE_LWP,
  158. FEATURE_LZCNT,
  159. FEATURE_MOVBE,
  160. FEATURE_MOVDIR64B,
  161. FEATURE_MOVDIRI,
  162. FEATURE_MWAITX,
  163. // FEATURE_OSXSAVE,
  164. FEATURE_PCONFIG = 63,
  165. FEATURE_PKU,
  166. FEATURE_PREFETCHWT1,
  167. FEATURE_PRFCHW,
  168. FEATURE_PTWRITE,
  169. FEATURE_RDPID,
  170. FEATURE_RDRND,
  171. FEATURE_RDSEED,
  172. FEATURE_RTM,
  173. FEATURE_SERIALIZE,
  174. FEATURE_SGX,
  175. FEATURE_SHA,
  176. FEATURE_SHSTK,
  177. FEATURE_TBM,
  178. FEATURE_TSXLDTRK,
  179. FEATURE_VAES,
  180. FEATURE_WAITPKG,
  181. FEATURE_WBNOINVD,
  182. FEATURE_XSAVE,
  183. FEATURE_XSAVEC,
  184. FEATURE_XSAVEOPT,
  185. FEATURE_XSAVES,
  186. FEATURE_AMX_TILE,
  187. FEATURE_AMX_INT8,
  188. FEATURE_AMX_BF16,
  189. FEATURE_UINTR,
  190. FEATURE_HRESET,
  191. FEATURE_KL,
  192. // FEATURE_AESKLE,
  193. FEATURE_WIDEKL = 92,
  194. FEATURE_AVXVNNI,
  195. FEATURE_AVX512FP16,
  196. FEATURE_X86_64_BASELINE,
  197. FEATURE_X86_64_V2,
  198. FEATURE_X86_64_V3,
  199. FEATURE_X86_64_V4,
  200. FEATURE_AVXIFMA,
  201. FEATURE_AVXVNNIINT8,
  202. FEATURE_AVXNECONVERT,
  203. FEATURE_CMPCCXADD,
  204. FEATURE_AMX_FP16,
  205. FEATURE_PREFETCHI,
  206. FEATURE_RAOINT,
  207. FEATURE_AMX_COMPLEX,
  208. FEATURE_AVXVNNIINT16,
  209. FEATURE_SM3,
  210. FEATURE_SHA512,
  211. FEATURE_SM4,
  212. FEATURE_APXF,
  213. FEATURE_USERMSR,
  214. FEATURE_AVX10_1_256,
  215. FEATURE_AVX10_1_512,
  216. CPU_FEATURE_MAX
  217. };
  218. // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
  219. // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
  220. // support. Consequently, for i386, the presence of CPUID is checked first
  221. // via the corresponding eflags bit.
  222. static bool isCpuIdSupported(void) {
  223. #if defined(__GNUC__) || defined(__clang__)
  224. #if defined(__i386__)
  225. int __cpuid_supported;
  226. __asm__(" pushfl\n"
  227. " popl %%eax\n"
  228. " movl %%eax,%%ecx\n"
  229. " xorl $0x00200000,%%eax\n"
  230. " pushl %%eax\n"
  231. " popfl\n"
  232. " pushfl\n"
  233. " popl %%eax\n"
  234. " movl $0,%0\n"
  235. " cmpl %%eax,%%ecx\n"
  236. " je 1f\n"
  237. " movl $1,%0\n"
  238. "1:"
  239. : "=r"(__cpuid_supported)
  240. :
  241. : "eax", "ecx");
  242. if (!__cpuid_supported)
  243. return false;
  244. #endif
  245. return true;
  246. #endif
  247. return true;
  248. }
  249. // This code is copied from lib/Support/Host.cpp.
  250. // Changes to either file should be mirrored in the other.
  251. /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
  252. /// the specified arguments. If we can't run cpuid on the host, return true.
  253. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
  254. unsigned *rECX, unsigned *rEDX) {
  255. #if defined(__GNUC__) || defined(__clang__)
  256. #if defined(__x86_64__)
  257. // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
  258. // FIXME: should we save this for Clang?
  259. __asm__("movq\t%%rbx, %%rsi\n\t"
  260. "cpuid\n\t"
  261. "xchgq\t%%rbx, %%rsi\n\t"
  262. : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
  263. : "a"(value));
  264. return false;
  265. #elif defined(__i386__)
  266. __asm__("movl\t%%ebx, %%esi\n\t"
  267. "cpuid\n\t"
  268. "xchgl\t%%ebx, %%esi\n\t"
  269. : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
  270. : "a"(value));
  271. return false;
  272. #else
  273. return true;
  274. #endif
  275. #elif defined(_MSC_VER)
  276. // The MSVC intrinsic is portable across x86 and x64.
  277. int registers[4];
  278. __cpuid(registers, value);
  279. *rEAX = registers[0];
  280. *rEBX = registers[1];
  281. *rECX = registers[2];
  282. *rEDX = registers[3];
  283. return false;
  284. #else
  285. return true;
  286. #endif
  287. }
  288. /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
  289. /// the 4 values in the specified arguments. If we can't run cpuid on the host,
  290. /// return true.
  291. static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
  292. unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
  293. unsigned *rEDX) {
  294. #if defined(__GNUC__) || defined(__clang__)
  295. #if defined(__x86_64__)
  296. // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
  297. // FIXME: should we save this for Clang?
  298. __asm__("movq\t%%rbx, %%rsi\n\t"
  299. "cpuid\n\t"
  300. "xchgq\t%%rbx, %%rsi\n\t"
  301. : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
  302. : "a"(value), "c"(subleaf));
  303. return false;
  304. #elif defined(__i386__)
  305. __asm__("movl\t%%ebx, %%esi\n\t"
  306. "cpuid\n\t"
  307. "xchgl\t%%ebx, %%esi\n\t"
  308. : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
  309. : "a"(value), "c"(subleaf));
  310. return false;
  311. #else
  312. return true;
  313. #endif
  314. #elif defined(_MSC_VER)
  315. int registers[4];
  316. __cpuidex(registers, value, subleaf);
  317. *rEAX = registers[0];
  318. *rEBX = registers[1];
  319. *rECX = registers[2];
  320. *rEDX = registers[3];
  321. return false;
  322. #else
  323. return true;
  324. #endif
  325. }
  326. // Read control register 0 (XCR0). Used to detect features such as AVX.
  327. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
  328. #if defined(__GNUC__) || defined(__clang__)
  329. // Check xgetbv; this uses a .byte sequence instead of the instruction
  330. // directly because older assemblers do not include support for xgetbv and
  331. // there is no easy way to conditionally compile based on the assembler used.
  332. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
  333. return false;
  334. #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
  335. unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
  336. *rEAX = Result;
  337. *rEDX = Result >> 32;
  338. return false;
  339. #else
  340. return true;
  341. #endif
  342. }
  343. static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
  344. unsigned *Model) {
  345. *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
  346. *Model = (EAX >> 4) & 0xf; // Bits 4 - 7
  347. if (*Family == 6 || *Family == 0xf) {
  348. if (*Family == 0xf)
  349. // Examine extended family ID if family ID is F.
  350. *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
  351. // Examine extended model ID if family ID is 6 or F.
  352. *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
  353. }
  354. }
  355. #define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
  356. static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
  357. unsigned Model,
  358. const unsigned *Features,
  359. unsigned *Type,
  360. unsigned *Subtype) {
  361. // We select CPU strings to match the code in Host.cpp, but we don't use them
  362. // in compiler-rt.
  363. const char *CPU = 0;
  364. switch (Family) {
  365. case 6:
  366. switch (Model) {
  367. case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
  368. // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
  369. // mobile processor, Intel Core 2 Extreme processor, Intel
  370. // Pentium Dual-Core processor, Intel Xeon processor, model
  371. // 0Fh. All processors are manufactured using the 65 nm process.
  372. case 0x16: // Intel Celeron processor model 16h. All processors are
  373. // manufactured using the 65 nm process
  374. CPU = "core2";
  375. *Type = INTEL_CORE2;
  376. break;
  377. case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
  378. // 17h. All processors are manufactured using the 45 nm process.
  379. //
  380. // 45nm: Penryn , Wolfdale, Yorkfield (XE)
  381. case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
  382. // the 45 nm process.
  383. CPU = "penryn";
  384. *Type = INTEL_CORE2;
  385. break;
  386. case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
  387. // processors are manufactured using the 45 nm process.
  388. case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz.
  389. // As found in a Summer 2010 model iMac.
  390. case 0x1f:
  391. case 0x2e: // Nehalem EX
  392. CPU = "nehalem";
  393. *Type = INTEL_COREI7;
  394. *Subtype = INTEL_COREI7_NEHALEM;
  395. break;
  396. case 0x25: // Intel Core i7, laptop version.
  397. case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
  398. // processors are manufactured using the 32 nm process.
  399. case 0x2f: // Westmere EX
  400. CPU = "westmere";
  401. *Type = INTEL_COREI7;
  402. *Subtype = INTEL_COREI7_WESTMERE;
  403. break;
  404. case 0x2a: // Intel Core i7 processor. All processors are manufactured
  405. // using the 32 nm process.
  406. case 0x2d:
  407. CPU = "sandybridge";
  408. *Type = INTEL_COREI7;
  409. *Subtype = INTEL_COREI7_SANDYBRIDGE;
  410. break;
  411. case 0x3a:
  412. case 0x3e: // Ivy Bridge EP
  413. CPU = "ivybridge";
  414. *Type = INTEL_COREI7;
  415. *Subtype = INTEL_COREI7_IVYBRIDGE;
  416. break;
  417. // Haswell:
  418. case 0x3c:
  419. case 0x3f:
  420. case 0x45:
  421. case 0x46:
  422. CPU = "haswell";
  423. *Type = INTEL_COREI7;
  424. *Subtype = INTEL_COREI7_HASWELL;
  425. break;
  426. // Broadwell:
  427. case 0x3d:
  428. case 0x47:
  429. case 0x4f:
  430. case 0x56:
  431. CPU = "broadwell";
  432. *Type = INTEL_COREI7;
  433. *Subtype = INTEL_COREI7_BROADWELL;
  434. break;
  435. // Skylake:
  436. case 0x4e: // Skylake mobile
  437. case 0x5e: // Skylake desktop
  438. case 0x8e: // Kaby Lake mobile
  439. case 0x9e: // Kaby Lake desktop
  440. case 0xa5: // Comet Lake-H/S
  441. case 0xa6: // Comet Lake-U
  442. CPU = "skylake";
  443. *Type = INTEL_COREI7;
  444. *Subtype = INTEL_COREI7_SKYLAKE;
  445. break;
  446. // Rocketlake:
  447. case 0xa7:
  448. CPU = "rocketlake";
  449. *Type = INTEL_COREI7;
  450. *Subtype = INTEL_COREI7_ROCKETLAKE;
  451. break;
  452. // Skylake Xeon:
  453. case 0x55:
  454. *Type = INTEL_COREI7;
  455. if (testFeature(FEATURE_AVX512BF16)) {
  456. CPU = "cooperlake";
  457. *Subtype = INTEL_COREI7_COOPERLAKE;
  458. } else if (testFeature(FEATURE_AVX512VNNI)) {
  459. CPU = "cascadelake";
  460. *Subtype = INTEL_COREI7_CASCADELAKE;
  461. } else {
  462. CPU = "skylake-avx512";
  463. *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
  464. }
  465. break;
  466. // Cannonlake:
  467. case 0x66:
  468. CPU = "cannonlake";
  469. *Type = INTEL_COREI7;
  470. *Subtype = INTEL_COREI7_CANNONLAKE;
  471. break;
  472. // Icelake:
  473. case 0x7d:
  474. case 0x7e:
  475. CPU = "icelake-client";
  476. *Type = INTEL_COREI7;
  477. *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
  478. break;
  479. // Tigerlake:
  480. case 0x8c:
  481. case 0x8d:
  482. CPU = "tigerlake";
  483. *Type = INTEL_COREI7;
  484. *Subtype = INTEL_COREI7_TIGERLAKE;
  485. break;
  486. // Alderlake:
  487. case 0x97:
  488. case 0x9a:
  489. // Raptorlake:
  490. case 0xb7:
  491. case 0xba:
  492. case 0xbf:
  493. // Meteorlake:
  494. case 0xaa:
  495. case 0xac:
  496. // Gracemont:
  497. case 0xbe:
  498. CPU = "alderlake";
  499. *Type = INTEL_COREI7;
  500. *Subtype = INTEL_COREI7_ALDERLAKE;
  501. break;
  502. // Arrowlake:
  503. case 0xc5:
  504. CPU = "arrowlake";
  505. *Type = INTEL_COREI7;
  506. *Subtype = INTEL_COREI7_ARROWLAKE;
  507. break;
  508. // Arrowlake S:
  509. case 0xc6:
  510. // Lunarlake:
  511. case 0xbd:
  512. CPU = "arrowlake-s";
  513. *Type = INTEL_COREI7;
  514. *Subtype = INTEL_COREI7_ARROWLAKE_S;
  515. break;
  516. // Pantherlake:
  517. case 0xcc:
  518. CPU = "pantherlake";
  519. *Type = INTEL_COREI7;
  520. *Subtype = INTEL_COREI7_PANTHERLAKE;
  521. break;
  522. // Icelake Xeon:
  523. case 0x6a:
  524. case 0x6c:
  525. CPU = "icelake-server";
  526. *Type = INTEL_COREI7;
  527. *Subtype = INTEL_COREI7_ICELAKE_SERVER;
  528. break;
  529. // Emerald Rapids:
  530. case 0xcf:
  531. // Sapphire Rapids:
  532. case 0x8f:
  533. CPU = "sapphirerapids";
  534. *Type = INTEL_COREI7;
  535. *Subtype = INTEL_COREI7_SAPPHIRERAPIDS;
  536. break;
  537. // Granite Rapids:
  538. case 0xad:
  539. CPU = "graniterapids";
  540. *Type = INTEL_COREI7;
  541. *Subtype = INTEL_COREI7_GRANITERAPIDS;
  542. break;
  543. // Granite Rapids D:
  544. case 0xae:
  545. CPU = "graniterapids-d";
  546. *Type = INTEL_COREI7;
  547. *Subtype = INTEL_COREI7_GRANITERAPIDS_D;
  548. break;
  549. case 0x1c: // Most 45 nm Intel Atom processors
  550. case 0x26: // 45 nm Atom Lincroft
  551. case 0x27: // 32 nm Atom Medfield
  552. case 0x35: // 32 nm Atom Midview
  553. case 0x36: // 32 nm Atom Midview
  554. CPU = "bonnell";
  555. *Type = INTEL_BONNELL;
  556. break;
  557. // Atom Silvermont codes from the Intel software optimization guide.
  558. case 0x37:
  559. case 0x4a:
  560. case 0x4d:
  561. case 0x5a:
  562. case 0x5d:
  563. case 0x4c: // really airmont
  564. CPU = "silvermont";
  565. *Type = INTEL_SILVERMONT;
  566. break;
  567. // Goldmont:
  568. case 0x5c: // Apollo Lake
  569. case 0x5f: // Denverton
  570. CPU = "goldmont";
  571. *Type = INTEL_GOLDMONT;
  572. break; // "goldmont"
  573. case 0x7a:
  574. CPU = "goldmont-plus";
  575. *Type = INTEL_GOLDMONT_PLUS;
  576. break;
  577. case 0x86:
  578. case 0x8a: // Lakefield
  579. case 0x96: // Elkhart Lake
  580. case 0x9c: // Jasper Lake
  581. CPU = "tremont";
  582. *Type = INTEL_TREMONT;
  583. break;
  584. // Sierraforest:
  585. case 0xaf:
  586. CPU = "sierraforest";
  587. *Type = INTEL_SIERRAFOREST;
  588. break;
  589. // Grandridge:
  590. case 0xb6:
  591. CPU = "grandridge";
  592. *Type = INTEL_GRANDRIDGE;
  593. break;
  594. // Clearwaterforest:
  595. case 0xdd:
  596. CPU = "clearwaterforest";
  597. *Type = INTEL_COREI7;
  598. *Subtype = INTEL_CLEARWATERFOREST;
  599. break;
  600. case 0x57:
  601. CPU = "knl";
  602. *Type = INTEL_KNL;
  603. break;
  604. case 0x85:
  605. CPU = "knm";
  606. *Type = INTEL_KNM;
  607. break;
  608. default: // Unknown family 6 CPU.
  609. break;
  610. }
  611. break;
  612. default:
  613. break; // Unknown.
  614. }
  615. return CPU;
  616. }
  617. static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
  618. unsigned Model,
  619. const unsigned *Features,
  620. unsigned *Type,
  621. unsigned *Subtype) {
  622. const char *CPU = 0;
  623. switch (Family) {
  624. case 4:
  625. CPU = "i486";
  626. break;
  627. case 5:
  628. CPU = "pentium";
  629. switch (Model) {
  630. case 6:
  631. case 7:
  632. CPU = "k6";
  633. break;
  634. case 8:
  635. CPU = "k6-2";
  636. break;
  637. case 9:
  638. case 13:
  639. CPU = "k6-3";
  640. break;
  641. case 10:
  642. CPU = "geode";
  643. break;
  644. }
  645. break;
  646. case 6:
  647. if (testFeature(FEATURE_SSE)) {
  648. CPU = "athlon-xp";
  649. break;
  650. }
  651. CPU = "athlon";
  652. break;
  653. case 15:
  654. if (testFeature(FEATURE_SSE3)) {
  655. CPU = "k8-sse3";
  656. break;
  657. }
  658. CPU = "k8";
  659. break;
  660. case 16:
  661. CPU = "amdfam10";
  662. *Type = AMDFAM10H; // "amdfam10"
  663. switch (Model) {
  664. case 2:
  665. *Subtype = AMDFAM10H_BARCELONA;
  666. break;
  667. case 4:
  668. *Subtype = AMDFAM10H_SHANGHAI;
  669. break;
  670. case 8:
  671. *Subtype = AMDFAM10H_ISTANBUL;
  672. break;
  673. }
  674. break;
  675. case 20:
  676. CPU = "btver1";
  677. *Type = AMD_BTVER1;
  678. break;
  679. case 21:
  680. CPU = "bdver1";
  681. *Type = AMDFAM15H;
  682. if (Model >= 0x60 && Model <= 0x7f) {
  683. CPU = "bdver4";
  684. *Subtype = AMDFAM15H_BDVER4;
  685. break; // 60h-7Fh: Excavator
  686. }
  687. if (Model >= 0x30 && Model <= 0x3f) {
  688. CPU = "bdver3";
  689. *Subtype = AMDFAM15H_BDVER3;
  690. break; // 30h-3Fh: Steamroller
  691. }
  692. if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
  693. CPU = "bdver2";
  694. *Subtype = AMDFAM15H_BDVER2;
  695. break; // 02h, 10h-1Fh: Piledriver
  696. }
  697. if (Model <= 0x0f) {
  698. *Subtype = AMDFAM15H_BDVER1;
  699. break; // 00h-0Fh: Bulldozer
  700. }
  701. break;
  702. case 22:
  703. CPU = "btver2";
  704. *Type = AMD_BTVER2;
  705. break;
  706. case 23:
  707. CPU = "znver1";
  708. *Type = AMDFAM17H;
  709. if ((Model >= 0x30 && Model <= 0x3f) || (Model == 0x47) ||
  710. (Model >= 0x60 && Model <= 0x67) || (Model >= 0x68 && Model <= 0x6f) ||
  711. (Model >= 0x70 && Model <= 0x7f) || (Model >= 0x84 && Model <= 0x87) ||
  712. (Model >= 0x90 && Model <= 0x97) || (Model >= 0x98 && Model <= 0x9f) ||
  713. (Model >= 0xa0 && Model <= 0xaf)) {
  714. // Family 17h Models 30h-3Fh (Starship) Zen 2
  715. // Family 17h Models 47h (Cardinal) Zen 2
  716. // Family 17h Models 60h-67h (Renoir) Zen 2
  717. // Family 17h Models 68h-6Fh (Lucienne) Zen 2
  718. // Family 17h Models 70h-7Fh (Matisse) Zen 2
  719. // Family 17h Models 84h-87h (ProjectX) Zen 2
  720. // Family 17h Models 90h-97h (VanGogh) Zen 2
  721. // Family 17h Models 98h-9Fh (Mero) Zen 2
  722. // Family 17h Models A0h-AFh (Mendocino) Zen 2
  723. CPU = "znver2";
  724. *Subtype = AMDFAM17H_ZNVER2;
  725. break;
  726. }
  727. if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x20 && Model <= 0x2f)) {
  728. // Family 17h Models 10h-1Fh (Raven1) Zen
  729. // Family 17h Models 10h-1Fh (Picasso) Zen+
  730. // Family 17h Models 20h-2Fh (Raven2 x86) Zen
  731. *Subtype = AMDFAM17H_ZNVER1;
  732. break;
  733. }
  734. break;
  735. case 25:
  736. CPU = "znver3";
  737. *Type = AMDFAM19H;
  738. if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
  739. (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
  740. (Model >= 0x50 && Model <= 0x5f)) {
  741. // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
  742. // Family 19h Models 20h-2Fh (Vermeer) Zen 3
  743. // Family 19h Models 30h-3Fh (Badami) Zen 3
  744. // Family 19h Models 40h-4Fh (Rembrandt) Zen 3+
  745. // Family 19h Models 50h-5Fh (Cezanne) Zen 3
  746. *Subtype = AMDFAM19H_ZNVER3;
  747. break;
  748. }
  749. if ((Model >= 0x10 && Model <= 0x1f) || (Model >= 0x60 && Model <= 0x6f) ||
  750. (Model >= 0x70 && Model <= 0x77) || (Model >= 0x78 && Model <= 0x7f) ||
  751. (Model >= 0xa0 && Model <= 0xaf)) {
  752. // Family 19h Models 10h-1Fh (Stones; Storm Peak) Zen 4
  753. // Family 19h Models 60h-6Fh (Raphael) Zen 4
  754. // Family 19h Models 70h-77h (Phoenix, Hawkpoint1) Zen 4
  755. // Family 19h Models 78h-7Fh (Phoenix 2, Hawkpoint2) Zen 4
  756. // Family 19h Models A0h-AFh (Stones-Dense) Zen 4
  757. CPU = "znver4";
  758. *Subtype = AMDFAM19H_ZNVER4;
  759. break; // "znver4"
  760. }
  761. break; // family 19h
  762. case 26:
  763. CPU = "znver5";
  764. *Type = AMDFAM1AH;
  765. if (Model <= 0x77) {
  766. // Models 00h-0Fh (Breithorn).
  767. // Models 10h-1Fh (Breithorn-Dense).
  768. // Models 20h-2Fh (Strix 1).
  769. // Models 30h-37h (Strix 2).
  770. // Models 38h-3Fh (Strix 3).
  771. // Models 40h-4Fh (Granite Ridge).
  772. // Models 50h-5Fh (Weisshorn).
  773. // Models 60h-6Fh (Krackan1).
  774. // Models 70h-77h (Sarlak).
  775. CPU = "znver5";
  776. *Subtype = AMDFAM1AH_ZNVER5;
  777. break; // "znver5"
  778. }
  779. break;
  780. default:
  781. break; // Unknown AMD CPU.
  782. }
  783. return CPU;
  784. }
  785. #undef testFeature
  786. static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
  787. unsigned *Features) {
  788. unsigned EAX = 0, EBX = 0;
  789. #define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
  790. #define setFeature(F) Features[F / 32] |= 1U << (F % 32)
  791. if ((EDX >> 15) & 1)
  792. setFeature(FEATURE_CMOV);
  793. if ((EDX >> 23) & 1)
  794. setFeature(FEATURE_MMX);
  795. if ((EDX >> 25) & 1)
  796. setFeature(FEATURE_SSE);
  797. if ((EDX >> 26) & 1)
  798. setFeature(FEATURE_SSE2);
  799. if ((ECX >> 0) & 1)
  800. setFeature(FEATURE_SSE3);
  801. if ((ECX >> 1) & 1)
  802. setFeature(FEATURE_PCLMUL);
  803. if ((ECX >> 9) & 1)
  804. setFeature(FEATURE_SSSE3);
  805. if ((ECX >> 12) & 1)
  806. setFeature(FEATURE_FMA);
  807. if ((ECX >> 13) & 1)
  808. setFeature(FEATURE_CMPXCHG16B);
  809. if ((ECX >> 19) & 1)
  810. setFeature(FEATURE_SSE4_1);
  811. if ((ECX >> 20) & 1)
  812. setFeature(FEATURE_SSE4_2);
  813. if ((ECX >> 22) & 1)
  814. setFeature(FEATURE_MOVBE);
  815. if ((ECX >> 23) & 1)
  816. setFeature(FEATURE_POPCNT);
  817. if ((ECX >> 25) & 1)
  818. setFeature(FEATURE_AES);
  819. if ((ECX >> 29) & 1)
  820. setFeature(FEATURE_F16C);
  821. if ((ECX >> 30) & 1)
  822. setFeature(FEATURE_RDRND);
  823. // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
  824. // indicates that the AVX registers will be saved and restored on context
  825. // switch, then we have full AVX support.
  826. const unsigned AVXBits = (1 << 27) | (1 << 28);
  827. bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
  828. ((EAX & 0x6) == 0x6);
  829. #if defined(__APPLE__)
  830. // Darwin lazily saves the AVX512 context on first use: trust that the OS will
  831. // save the AVX512 context if we use AVX512 instructions, even the bit is not
  832. // set right now.
  833. bool HasAVX512Save = true;
  834. #else
  835. // AVX512 requires additional context to be saved by the OS.
  836. bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
  837. #endif
  838. // AMX requires additional context to be saved by the OS.
  839. const unsigned AMXBits = (1 << 17) | (1 << 18);
  840. bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
  841. bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
  842. if (HasAVXSave)
  843. setFeature(FEATURE_AVX);
  844. if (((ECX >> 26) & 1) && HasAVXSave)
  845. setFeature(FEATURE_XSAVE);
  846. bool HasLeaf7 =
  847. MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
  848. if (HasLeaf7 && ((EBX >> 0) & 1))
  849. setFeature(FEATURE_FSGSBASE);
  850. if (HasLeaf7 && ((EBX >> 2) & 1))
  851. setFeature(FEATURE_SGX);
  852. if (HasLeaf7 && ((EBX >> 3) & 1))
  853. setFeature(FEATURE_BMI);
  854. if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
  855. setFeature(FEATURE_AVX2);
  856. if (HasLeaf7 && ((EBX >> 8) & 1))
  857. setFeature(FEATURE_BMI2);
  858. if (HasLeaf7 && ((EBX >> 11) & 1))
  859. setFeature(FEATURE_RTM);
  860. if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
  861. setFeature(FEATURE_AVX512F);
  862. if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
  863. setFeature(FEATURE_AVX512DQ);
  864. if (HasLeaf7 && ((EBX >> 18) & 1))
  865. setFeature(FEATURE_RDSEED);
  866. if (HasLeaf7 && ((EBX >> 19) & 1))
  867. setFeature(FEATURE_ADX);
  868. if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
  869. setFeature(FEATURE_AVX512IFMA);
  870. if (HasLeaf7 && ((EBX >> 24) & 1))
  871. setFeature(FEATURE_CLWB);
  872. if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
  873. setFeature(FEATURE_AVX512PF);
  874. if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
  875. setFeature(FEATURE_AVX512ER);
  876. if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
  877. setFeature(FEATURE_AVX512CD);
  878. if (HasLeaf7 && ((EBX >> 29) & 1))
  879. setFeature(FEATURE_SHA);
  880. if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
  881. setFeature(FEATURE_AVX512BW);
  882. if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
  883. setFeature(FEATURE_AVX512VL);
  884. if (HasLeaf7 && ((ECX >> 0) & 1))
  885. setFeature(FEATURE_PREFETCHWT1);
  886. if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
  887. setFeature(FEATURE_AVX512VBMI);
  888. if (HasLeaf7 && ((ECX >> 4) & 1))
  889. setFeature(FEATURE_PKU);
  890. if (HasLeaf7 && ((ECX >> 5) & 1))
  891. setFeature(FEATURE_WAITPKG);
  892. if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
  893. setFeature(FEATURE_AVX512VBMI2);
  894. if (HasLeaf7 && ((ECX >> 7) & 1))
  895. setFeature(FEATURE_SHSTK);
  896. if (HasLeaf7 && ((ECX >> 8) & 1))
  897. setFeature(FEATURE_GFNI);
  898. if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
  899. setFeature(FEATURE_VAES);
  900. if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
  901. setFeature(FEATURE_VPCLMULQDQ);
  902. if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
  903. setFeature(FEATURE_AVX512VNNI);
  904. if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
  905. setFeature(FEATURE_AVX512BITALG);
  906. if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
  907. setFeature(FEATURE_AVX512VPOPCNTDQ);
  908. if (HasLeaf7 && ((ECX >> 22) & 1))
  909. setFeature(FEATURE_RDPID);
  910. if (HasLeaf7 && ((ECX >> 23) & 1))
  911. setFeature(FEATURE_KL);
  912. if (HasLeaf7 && ((ECX >> 25) & 1))
  913. setFeature(FEATURE_CLDEMOTE);
  914. if (HasLeaf7 && ((ECX >> 27) & 1))
  915. setFeature(FEATURE_MOVDIRI);
  916. if (HasLeaf7 && ((ECX >> 28) & 1))
  917. setFeature(FEATURE_MOVDIR64B);
  918. if (HasLeaf7 && ((ECX >> 29) & 1))
  919. setFeature(FEATURE_ENQCMD);
  920. if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
  921. setFeature(FEATURE_AVX5124VNNIW);
  922. if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
  923. setFeature(FEATURE_AVX5124FMAPS);
  924. if (HasLeaf7 && ((EDX >> 5) & 1))
  925. setFeature(FEATURE_UINTR);
  926. if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
  927. setFeature(FEATURE_AVX512VP2INTERSECT);
  928. if (HasLeaf7 && ((EDX >> 14) & 1))
  929. setFeature(FEATURE_SERIALIZE);
  930. if (HasLeaf7 && ((EDX >> 16) & 1))
  931. setFeature(FEATURE_TSXLDTRK);
  932. if (HasLeaf7 && ((EDX >> 18) & 1))
  933. setFeature(FEATURE_PCONFIG);
  934. if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
  935. setFeature(FEATURE_AMX_BF16);
  936. if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
  937. setFeature(FEATURE_AVX512FP16);
  938. if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
  939. setFeature(FEATURE_AMX_TILE);
  940. if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
  941. setFeature(FEATURE_AMX_INT8);
  942. // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
  943. // return all 0s for invalid subleaves so check the limit.
  944. bool HasLeaf7Subleaf1 =
  945. HasLeaf7 && EAX >= 1 &&
  946. !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
  947. if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
  948. setFeature(FEATURE_SHA512);
  949. if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
  950. setFeature(FEATURE_SM3);
  951. if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
  952. setFeature(FEATURE_SM4);
  953. if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
  954. setFeature(FEATURE_RAOINT);
  955. if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
  956. setFeature(FEATURE_AVXVNNI);
  957. if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
  958. setFeature(FEATURE_AVX512BF16);
  959. if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
  960. setFeature(FEATURE_CMPCCXADD);
  961. if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
  962. setFeature(FEATURE_AMX_FP16);
  963. if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
  964. setFeature(FEATURE_HRESET);
  965. if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
  966. setFeature(FEATURE_AVXIFMA);
  967. if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
  968. setFeature(FEATURE_AVXVNNIINT8);
  969. if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
  970. setFeature(FEATURE_AVXNECONVERT);
  971. if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
  972. setFeature(FEATURE_AMX_COMPLEX);
  973. if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
  974. setFeature(FEATURE_AVXVNNIINT16);
  975. if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
  976. setFeature(FEATURE_PREFETCHI);
  977. if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
  978. setFeature(FEATURE_USERMSR);
  979. if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
  980. setFeature(FEATURE_AVX10_1_256);
  981. if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
  982. setFeature(FEATURE_APXF);
  983. unsigned MaxLevel;
  984. getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
  985. bool HasLeafD = MaxLevel >= 0xd &&
  986. !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
  987. if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
  988. setFeature(FEATURE_XSAVEOPT);
  989. if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
  990. setFeature(FEATURE_XSAVEC);
  991. if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
  992. setFeature(FEATURE_XSAVES);
  993. bool HasLeaf24 =
  994. MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
  995. if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
  996. setFeature(FEATURE_AVX10_1_512);
  997. unsigned MaxExtLevel;
  998. getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
  999. bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
  1000. !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
  1001. if (HasExtLeaf1) {
  1002. if (ECX & 1)
  1003. setFeature(FEATURE_LAHF_LM);
  1004. if ((ECX >> 5) & 1)
  1005. setFeature(FEATURE_LZCNT);
  1006. if (((ECX >> 6) & 1))
  1007. setFeature(FEATURE_SSE4_A);
  1008. if (((ECX >> 8) & 1))
  1009. setFeature(FEATURE_PRFCHW);
  1010. if (((ECX >> 11) & 1))
  1011. setFeature(FEATURE_XOP);
  1012. if (((ECX >> 15) & 1))
  1013. setFeature(FEATURE_LWP);
  1014. if (((ECX >> 16) & 1))
  1015. setFeature(FEATURE_FMA4);
  1016. if (((ECX >> 21) & 1))
  1017. setFeature(FEATURE_TBM);
  1018. if (((ECX >> 29) & 1))
  1019. setFeature(FEATURE_MWAITX);
  1020. if (((EDX >> 29) & 1))
  1021. setFeature(FEATURE_LM);
  1022. }
  1023. bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
  1024. !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
  1025. if (HasExtLeaf8 && ((EBX >> 0) & 1))
  1026. setFeature(FEATURE_CLZERO);
  1027. if (HasExtLeaf8 && ((EBX >> 9) & 1))
  1028. setFeature(FEATURE_WBNOINVD);
  1029. bool HasLeaf14 = MaxLevel >= 0x14 &&
  1030. !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
  1031. if (HasLeaf14 && ((EBX >> 4) & 1))
  1032. setFeature(FEATURE_PTWRITE);
  1033. bool HasLeaf19 =
  1034. MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
  1035. if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
  1036. setFeature(FEATURE_WIDEKL);
  1037. if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
  1038. setFeature(FEATURE_X86_64_BASELINE);
  1039. if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
  1040. hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
  1041. setFeature(FEATURE_X86_64_V2);
  1042. if (hasFeature(FEATURE_AVX2) && hasFeature(FEATURE_BMI) &&
  1043. hasFeature(FEATURE_BMI2) && hasFeature(FEATURE_F16C) &&
  1044. hasFeature(FEATURE_FMA) && hasFeature(FEATURE_LZCNT) &&
  1045. hasFeature(FEATURE_MOVBE)) {
  1046. setFeature(FEATURE_X86_64_V3);
  1047. if (hasFeature(FEATURE_AVX512BW) && hasFeature(FEATURE_AVX512CD) &&
  1048. hasFeature(FEATURE_AVX512DQ) && hasFeature(FEATURE_AVX512VL))
  1049. setFeature(FEATURE_X86_64_V4);
  1050. }
  1051. }
  1052. }
  1053. #undef hasFeature
  1054. #undef setFeature
  1055. }
  1056. #ifndef _WIN32
  1057. __attribute__((visibility("hidden")))
  1058. #endif
  1059. int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE;
  1060. #ifndef _WIN32
  1061. __attribute__((visibility("hidden")))
  1062. #endif
  1063. struct __processor_model {
  1064. unsigned int __cpu_vendor;
  1065. unsigned int __cpu_type;
  1066. unsigned int __cpu_subtype;
  1067. unsigned int __cpu_features[1];
  1068. } __cpu_model = {0, 0, 0, {0}};
  1069. #ifndef _WIN32
  1070. __attribute__((visibility("hidden")))
  1071. #endif
  1072. unsigned __cpu_features2[(CPU_FEATURE_MAX - 1) / 32];
  1073. // A constructor function that is sets __cpu_model and __cpu_features2 with
  1074. // the right values. This needs to run only once. This constructor is
  1075. // given the highest priority and it should run before constructors without
  1076. // the priority set. However, it still runs after ifunc initializers and
  1077. // needs to be called explicitly there.
  1078. int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
  1079. unsigned EAX, EBX, ECX, EDX;
  1080. unsigned MaxLeaf = 5;
  1081. unsigned Vendor;
  1082. unsigned Model, Family;
  1083. unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0};
  1084. static_assert(sizeof(Features) / sizeof(Features[0]) == 4, "");
  1085. static_assert(sizeof(__cpu_features2) / sizeof(__cpu_features2[0]) == 3, "");
  1086. // This function needs to run just once.
  1087. if (__cpu_model.__cpu_vendor)
  1088. return 0;
  1089. if (!isCpuIdSupported() ||
  1090. getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
  1091. __cpu_model.__cpu_vendor = VENDOR_OTHER;
  1092. return -1;
  1093. }
  1094. getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
  1095. detectX86FamilyModel(EAX, &Family, &Model);
  1096. // Find available features.
  1097. getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]);
  1098. __cpu_model.__cpu_features[0] = Features[0];
  1099. __cpu_features2[0] = Features[1];
  1100. __cpu_features2[1] = Features[2];
  1101. __cpu_features2[2] = Features[3];
  1102. if (Vendor == SIG_INTEL) {
  1103. // Get CPU type.
  1104. getIntelProcessorTypeAndSubtype(Family, Model, &Features[0],
  1105. &(__cpu_model.__cpu_type),
  1106. &(__cpu_model.__cpu_subtype));
  1107. __cpu_model.__cpu_vendor = VENDOR_INTEL;
  1108. } else if (Vendor == SIG_AMD) {
  1109. // Get CPU type.
  1110. getAMDProcessorTypeAndSubtype(Family, Model, &Features[0],
  1111. &(__cpu_model.__cpu_type),
  1112. &(__cpu_model.__cpu_subtype));
  1113. __cpu_model.__cpu_vendor = VENDOR_AMD;
  1114. } else
  1115. __cpu_model.__cpu_vendor = VENDOR_OTHER;
  1116. assert(__cpu_model.__cpu_vendor < VENDOR_MAX);
  1117. assert(__cpu_model.__cpu_type < CPU_TYPE_MAX);
  1118. assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX);
  1119. return 0;
  1120. }
  1121. #endif // defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)