cpu.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. // Copyright 2011 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // CPU detection
  11. //
  12. // Author: Christian Duvivier (cduvivier@google.com)
  13. #include "./dsp.h"
  14. #if defined(WEBP_HAVE_NEON_RTCD)
  15. #include <stdio.h>
  16. #include <string.h>
  17. #endif
  18. #if defined(WEBP_ANDROID_NEON)
  19. #include <cpu-features.h>
  20. #endif
  21. //------------------------------------------------------------------------------
  22. // SSE2 detection.
  23. //
  24. // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
  25. #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
  26. static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
  27. __asm__ volatile (
  28. "mov %%ebx, %%edi\n"
  29. "cpuid\n"
  30. "xchg %%edi, %%ebx\n"
  31. : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
  32. : "a"(info_type), "c"(0));
  33. }
  34. #elif defined(__x86_64__) && \
  35. (defined(__code_model_medium__) || defined(__code_model_large__)) && \
  36. defined(__PIC__)
  37. static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
  38. __asm__ volatile (
  39. "xchg{q}\t{%%rbx}, %q1\n"
  40. "cpuid\n"
  41. "xchg{q}\t{%%rbx}, %q1\n"
  42. : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
  43. "=d"(cpu_info[3])
  44. : "a"(info_type), "c"(0));
  45. }
  46. #elif defined(__i386__) || defined(__x86_64__)
  47. static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
  48. __asm__ volatile (
  49. "cpuid\n"
  50. : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
  51. : "a"(info_type), "c"(0));
  52. }
  53. #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
  54. #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
  55. #include <intrin.h>
  56. #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
  57. #define WEBP_HAVE_MSC_CPUID
  58. #elif _MSC_VER > 1310
  59. #include <intrin.h>
  60. #define GetCPUInfo __cpuid
  61. #define WEBP_HAVE_MSC_CPUID
  62. #endif
  63. #endif
  64. // NaCl has no support for xgetbv or the raw opcode.
  65. #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
  66. static WEBP_INLINE uint64_t xgetbv(void) {
  67. const uint32_t ecx = 0;
  68. uint32_t eax, edx;
  69. // Use the raw opcode for xgetbv for compatibility with older toolchains.
  70. __asm__ volatile (
  71. ".byte 0x0f, 0x01, 0xd0\n"
  72. : "=a"(eax), "=d"(edx) : "c" (ecx));
  73. return ((uint64_t)edx << 32) | eax;
  74. }
  75. #elif (defined(_M_X64) || defined(_M_IX86)) && \
  76. defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
  77. #include <immintrin.h>
  78. #define xgetbv() _xgetbv(0)
  79. #elif defined(_MSC_VER) && defined(_M_IX86)
  80. static WEBP_INLINE uint64_t xgetbv(void) {
  81. uint32_t eax_, edx_;
  82. __asm {
  83. xor ecx, ecx // ecx = 0
  84. // Use the raw opcode for xgetbv for compatibility with older toolchains.
  85. __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
  86. mov eax_, eax
  87. mov edx_, edx
  88. }
  89. return ((uint64_t)edx_ << 32) | eax_;
  90. }
  91. #else
  92. #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
  93. #endif
  94. #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
  95. // helper function for run-time detection of slow SSSE3 platforms
  96. static int CheckSlowModel(int info) {
  97. // Table listing display models with longer latencies for the bsr instruction
  98. // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
  99. // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
  100. static const uint8_t kSlowModels[] = {
  101. 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
  102. 0x1c, 0x26, 0x27 // Atom Microarchitecture
  103. };
  104. const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
  105. const uint32_t family = (info >> 8) & 0xf;
  106. if (family == 0x06) {
  107. size_t i;
  108. for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
  109. if (model == kSlowModels[i]) return 1;
  110. }
  111. }
  112. return 0;
  113. }
  114. static int x86CPUInfo(CPUFeature feature) {
  115. int max_cpuid_value;
  116. int cpu_info[4];
  117. int is_intel = 0;
  118. // get the highest feature value cpuid supports
  119. GetCPUInfo(cpu_info, 0);
  120. max_cpuid_value = cpu_info[0];
  121. if (max_cpuid_value < 1) {
  122. return 0;
  123. } else {
  124. const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
  125. const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
  126. const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
  127. is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
  128. cpu_info[2] == VENDOR_ID_INTEL_ECX &&
  129. cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
  130. }
  131. GetCPUInfo(cpu_info, 1);
  132. if (feature == kSSE2) {
  133. return !!(cpu_info[3] & (1 << 26));
  134. }
  135. if (feature == kSSE3) {
  136. return !!(cpu_info[2] & (1 << 0));
  137. }
  138. if (feature == kSlowSSSE3) {
  139. if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
  140. return CheckSlowModel(cpu_info[0]);
  141. }
  142. return 0;
  143. }
  144. if (feature == kSSE4_1) {
  145. return !!(cpu_info[2] & (1 << 19));
  146. }
  147. if (feature == kAVX) {
  148. // bits 27 (OSXSAVE) & 28 (256-bit AVX)
  149. if ((cpu_info[2] & 0x18000000) == 0x18000000) {
  150. // XMM state and YMM state enabled by the OS.
  151. return (xgetbv() & 0x6) == 0x6;
  152. }
  153. }
  154. if (feature == kAVX2) {
  155. if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
  156. GetCPUInfo(cpu_info, 7);
  157. return !!(cpu_info[1] & (1 << 5));
  158. }
  159. }
  160. return 0;
  161. }
  162. VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
  163. #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
  164. static int AndroidCPUInfo(CPUFeature feature) {
  165. const AndroidCpuFamily cpu_family = android_getCpuFamily();
  166. const uint64_t cpu_features = android_getCpuFeatures();
  167. if (feature == kNEON) {
  168. return cpu_family == ANDROID_CPU_FAMILY_ARM &&
  169. (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
  170. }
  171. return 0;
  172. }
  173. VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
  174. #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
  175. // Use compile flags as an indicator of SIMD support instead of a runtime check.
  176. static int wasmCPUInfo(CPUFeature feature) {
  177. switch (feature) {
  178. #ifdef WEBP_HAVE_SSE2
  179. case kSSE2:
  180. return 1;
  181. #endif
  182. #ifdef WEBP_HAVE_SSE41
  183. case kSSE3:
  184. case kSlowSSSE3:
  185. case kSSE4_1:
  186. return 1;
  187. #endif
  188. #ifdef WEBP_HAVE_NEON
  189. case kNEON:
  190. return 1;
  191. #endif
  192. default:
  193. break;
  194. }
  195. return 0;
  196. }
  197. VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
  198. #elif defined(WEBP_HAVE_NEON)
  199. // In most cases this function doesn't check for NEON support (it's assumed by
  200. // the configuration), but enables turning off NEON at runtime, for testing
  201. // purposes, by setting VP8DecGetCPUInfo = NULL.
  202. static int armCPUInfo(CPUFeature feature) {
  203. if (feature != kNEON) return 0;
  204. #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
  205. {
  206. int has_neon = 0;
  207. char line[200];
  208. FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
  209. if (cpuinfo == NULL) return 0;
  210. while (fgets(line, sizeof(line), cpuinfo)) {
  211. if (!strncmp(line, "Features", 8)) {
  212. if (strstr(line, " neon ") != NULL) {
  213. has_neon = 1;
  214. break;
  215. }
  216. }
  217. }
  218. fclose(cpuinfo);
  219. return has_neon;
  220. }
  221. #else
  222. return 1;
  223. #endif
  224. }
  225. VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
  226. #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
  227. defined(WEBP_USE_MSA)
  228. static int mipsCPUInfo(CPUFeature feature) {
  229. if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
  230. return 1;
  231. } else {
  232. return 0;
  233. }
  234. }
  235. VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
  236. #else
  237. VP8CPUInfo VP8GetCPUInfo = NULL;
  238. #endif