sysinfo.cc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/base/internal/sysinfo.h"
  15. #include "absl/base/attributes.h"
  16. #ifdef _WIN32
  17. #include <windows.h>
  18. #else
  19. #include <fcntl.h>
  20. #include <pthread.h>
  21. #include <sys/stat.h>
  22. #include <sys/types.h>
  23. #include <unistd.h>
  24. #endif
  25. #ifdef __linux__
  26. #include <sys/syscall.h>
  27. #endif
  28. #if defined(__APPLE__) || defined(__FreeBSD__)
  29. #include <sys/sysctl.h>
  30. #endif
  31. #ifdef __FreeBSD__
  32. #include <pthread_np.h>
  33. #endif
  34. #ifdef __NetBSD__
  35. #include <lwp.h>
  36. #endif
  37. #if defined(__myriad2__)
  38. #error #include <rtems.h>
  39. #endif
  40. #include <string.h>
  41. #include <cassert>
  42. #include <cerrno>
  43. #include <cstdint>
  44. #include <cstdio>
  45. #include <cstdlib>
  46. #include <ctime>
  47. #include <limits>
  48. #include <thread> // NOLINT(build/c++11)
  49. #include <utility>
  50. #include <vector>
  51. #include "absl/base/call_once.h"
  52. #include "absl/base/config.h"
  53. #include "absl/base/internal/raw_logging.h"
  54. #include "absl/base/internal/spinlock.h"
  55. #include "absl/base/internal/unscaledcycleclock.h"
  56. #include "absl/base/thread_annotations.h"
  57. namespace absl {
  58. ABSL_NAMESPACE_BEGIN
  59. namespace base_internal {
  60. namespace {
  61. #if defined(_WIN32)
  62. // Returns number of bits set in `bitMask`
  63. DWORD Win32CountSetBits(ULONG_PTR bitMask) {
  64. for (DWORD bitSetCount = 0; ; ++bitSetCount) {
  65. if (bitMask == 0) return bitSetCount;
  66. bitMask &= bitMask - 1;
  67. }
  68. }
  69. // Returns the number of logical CPUs using GetLogicalProcessorInformation(), or
  70. // 0 if the number of processors is not available or can not be computed.
  71. // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation
  72. int Win32NumCPUs() {
  73. #pragma comment(lib, "kernel32.lib")
  74. using Info = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
  75. DWORD info_size = sizeof(Info);
  76. Info* info(static_cast<Info*>(malloc(info_size)));
  77. if (info == nullptr) return 0;
  78. bool success = GetLogicalProcessorInformation(info, &info_size);
  79. if (!success && GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
  80. free(info);
  81. info = static_cast<Info*>(malloc(info_size));
  82. if (info == nullptr) return 0;
  83. success = GetLogicalProcessorInformation(info, &info_size);
  84. }
  85. DWORD logicalProcessorCount = 0;
  86. if (success) {
  87. Info* ptr = info;
  88. DWORD byteOffset = 0;
  89. while (byteOffset + sizeof(Info) <= info_size) {
  90. switch (ptr->Relationship) {
  91. case RelationProcessorCore:
  92. logicalProcessorCount += Win32CountSetBits(ptr->ProcessorMask);
  93. break;
  94. case RelationNumaNode:
  95. case RelationCache:
  96. case RelationProcessorPackage:
  97. // Ignore other entries
  98. break;
  99. default:
  100. // Ignore unknown entries
  101. break;
  102. }
  103. byteOffset += sizeof(Info);
  104. ptr++;
  105. }
  106. }
  107. free(info);
  108. return static_cast<int>(logicalProcessorCount);
  109. }
  110. #endif
  111. } // namespace
  112. static int GetNumCPUs() {
  113. #if defined(__myriad2__)
  114. return 1;
  115. #elif defined(_WIN32)
  116. const int hardware_concurrency = Win32NumCPUs();
  117. return hardware_concurrency ? hardware_concurrency : 1;
  118. #elif defined(_AIX)
  119. return sysconf(_SC_NPROCESSORS_ONLN);
  120. #else
  121. // Other possibilities:
  122. // - Read /sys/devices/system/cpu/online and use cpumask_parse()
  123. // - sysconf(_SC_NPROCESSORS_ONLN)
  124. return static_cast<int>(std::thread::hardware_concurrency());
  125. #endif
  126. }
  127. #if defined(_WIN32)
  128. static double GetNominalCPUFrequency() {
  129. #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \
  130. !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
  131. // UWP apps don't have access to the registry and currently don't provide an
  132. // API informing about CPU nominal frequency.
  133. return 1.0;
  134. #else
  135. #pragma comment(lib, "advapi32.lib") // For Reg* functions.
  136. HKEY key;
  137. // Use the Reg* functions rather than the SH functions because shlwapi.dll
  138. // pulls in gdi32.dll which makes process destruction much more costly.
  139. if (RegOpenKeyExA(HKEY_LOCAL_MACHINE,
  140. "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0,
  141. KEY_READ, &key) == ERROR_SUCCESS) {
  142. DWORD type = 0;
  143. DWORD data = 0;
  144. DWORD data_size = sizeof(data);
  145. auto result = RegQueryValueExA(key, "~MHz", nullptr, &type,
  146. reinterpret_cast<LPBYTE>(&data), &data_size);
  147. RegCloseKey(key);
  148. if (result == ERROR_SUCCESS && type == REG_DWORD &&
  149. data_size == sizeof(data)) {
  150. return data * 1e6; // Value is MHz.
  151. }
  152. }
  153. return 1.0;
  154. #endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP
  155. }
  156. #elif defined(CTL_HW) && defined(HW_CPU_FREQ)
  157. static double GetNominalCPUFrequency() {
  158. unsigned freq;
  159. size_t size = sizeof(freq);
  160. int mib[2] = {CTL_HW, HW_CPU_FREQ};
  161. if (sysctl(mib, 2, &freq, &size, nullptr, 0) == 0) {
  162. return static_cast<double>(freq);
  163. }
  164. return 1.0;
  165. }
  166. #else
  167. // Helper function for reading a long from a file. Returns true if successful
  168. // and the memory location pointed to by value is set to the value read.
  169. static bool ReadLongFromFile(const char *file, long *value) {
  170. bool ret = false;
  171. #if defined(_POSIX_C_SOURCE)
  172. const int file_mode = (O_RDONLY | O_CLOEXEC);
  173. #else
  174. const int file_mode = O_RDONLY;
  175. #endif
  176. int fd = open(file, file_mode);
  177. if (fd != -1) {
  178. char line[1024];
  179. char *err;
  180. memset(line, '\0', sizeof(line));
  181. ssize_t len;
  182. do {
  183. len = read(fd, line, sizeof(line) - 1);
  184. } while (len < 0 && errno == EINTR);
  185. if (len <= 0) {
  186. ret = false;
  187. } else {
  188. const long temp_value = strtol(line, &err, 10);
  189. if (line[0] != '\0' && (*err == '\n' || *err == '\0')) {
  190. *value = temp_value;
  191. ret = true;
  192. }
  193. }
  194. close(fd);
  195. }
  196. return ret;
  197. }
  198. #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
  199. // Reads a monotonic time source and returns a value in
  200. // nanoseconds. The returned value uses an arbitrary epoch, not the
  201. // Unix epoch.
  202. static int64_t ReadMonotonicClockNanos() {
  203. struct timespec t;
  204. #ifdef CLOCK_MONOTONIC_RAW
  205. int rc = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
  206. #else
  207. int rc = clock_gettime(CLOCK_MONOTONIC, &t);
  208. #endif
  209. if (rc != 0) {
  210. ABSL_INTERNAL_LOG(
  211. FATAL, "clock_gettime() failed: (" + std::to_string(errno) + ")");
  212. }
  213. return int64_t{t.tv_sec} * 1000000000 + t.tv_nsec;
  214. }
  215. class UnscaledCycleClockWrapperForInitializeFrequency {
  216. public:
  217. static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); }
  218. };
  219. struct TimeTscPair {
  220. int64_t time; // From ReadMonotonicClockNanos().
  221. int64_t tsc; // From UnscaledCycleClock::Now().
  222. };
  223. // Returns a pair of values (monotonic kernel time, TSC ticks) that
  224. // approximately correspond to each other. This is accomplished by
  225. // doing several reads and picking the reading with the lowest
  226. // latency. This approach is used to minimize the probability that
  227. // our thread was preempted between clock reads.
  228. static TimeTscPair GetTimeTscPair() {
  229. int64_t best_latency = std::numeric_limits<int64_t>::max();
  230. TimeTscPair best;
  231. for (int i = 0; i < 10; ++i) {
  232. int64_t t0 = ReadMonotonicClockNanos();
  233. int64_t tsc = UnscaledCycleClockWrapperForInitializeFrequency::Now();
  234. int64_t t1 = ReadMonotonicClockNanos();
  235. int64_t latency = t1 - t0;
  236. if (latency < best_latency) {
  237. best_latency = latency;
  238. best.time = t0;
  239. best.tsc = tsc;
  240. }
  241. }
  242. return best;
  243. }
  244. // Measures and returns the TSC frequency by taking a pair of
  245. // measurements approximately `sleep_nanoseconds` apart.
  246. static double MeasureTscFrequencyWithSleep(int sleep_nanoseconds) {
  247. auto t0 = GetTimeTscPair();
  248. struct timespec ts;
  249. ts.tv_sec = 0;
  250. ts.tv_nsec = sleep_nanoseconds;
  251. while (nanosleep(&ts, &ts) != 0 && errno == EINTR) {}
  252. auto t1 = GetTimeTscPair();
  253. double elapsed_ticks = t1.tsc - t0.tsc;
  254. double elapsed_time = (t1.time - t0.time) * 1e-9;
  255. return elapsed_ticks / elapsed_time;
  256. }
  257. // Measures and returns the TSC frequency by calling
  258. // MeasureTscFrequencyWithSleep(), doubling the sleep interval until the
  259. // frequency measurement stabilizes.
  260. static double MeasureTscFrequency() {
  261. double last_measurement = -1.0;
  262. int sleep_nanoseconds = 1000000; // 1 millisecond.
  263. for (int i = 0; i < 8; ++i) {
  264. double measurement = MeasureTscFrequencyWithSleep(sleep_nanoseconds);
  265. if (measurement * 0.99 < last_measurement &&
  266. last_measurement < measurement * 1.01) {
  267. // Use the current measurement if it is within 1% of the
  268. // previous measurement.
  269. return measurement;
  270. }
  271. last_measurement = measurement;
  272. sleep_nanoseconds *= 2;
  273. }
  274. return last_measurement;
  275. }
  276. #endif // ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
  277. static double GetNominalCPUFrequency() {
  278. long freq = 0;
  279. // Google's production kernel has a patch to export the TSC
  280. // frequency through sysfs. If the kernel is exporting the TSC
  281. // frequency use that. There are issues where cpuinfo_max_freq
  282. // cannot be relied on because the BIOS may be exporting an invalid
  283. // p-state (on x86) or p-states may be used to put the processor in
  284. // a new mode (turbo mode). Essentially, those frequencies cannot
  285. // always be relied upon. The same reasons apply to /proc/cpuinfo as
  286. // well.
  287. if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) {
  288. return freq * 1e3; // Value is kHz.
  289. }
  290. #if defined(ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY)
  291. // On these platforms, the TSC frequency is the nominal CPU
  292. // frequency. But without having the kernel export it directly
  293. // though /sys/devices/system/cpu/cpu0/tsc_freq_khz, there is no
  294. // other way to reliably get the TSC frequency, so we have to
  295. // measure it ourselves. Some CPUs abuse cpuinfo_max_freq by
  296. // exporting "fake" frequencies for implementing new features. For
  297. // example, Intel's turbo mode is enabled by exposing a p-state
  298. // value with a higher frequency than that of the real TSC
  299. // rate. Because of this, we prefer to measure the TSC rate
  300. // ourselves on i386 and x86-64.
  301. return MeasureTscFrequency();
  302. #else
  303. // If CPU scaling is in effect, we want to use the *maximum*
  304. // frequency, not whatever CPU speed some random processor happens
  305. // to be using now.
  306. if (ReadLongFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
  307. &freq)) {
  308. return freq * 1e3; // Value is kHz.
  309. }
  310. return 1.0;
  311. #endif // !ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
  312. }
  313. #endif
  314. ABSL_CONST_INIT static once_flag init_num_cpus_once;
  315. ABSL_CONST_INIT static int num_cpus = 0;
  316. // NumCPUs() may be called before main() and before malloc is properly
  317. // initialized, therefore this must not allocate memory.
  318. int NumCPUs() {
  319. base_internal::LowLevelCallOnce(
  320. &init_num_cpus_once, []() { num_cpus = GetNumCPUs(); });
  321. return num_cpus;
  322. }
  323. // A default frequency of 0.0 might be dangerous if it is used in division.
  324. ABSL_CONST_INIT static once_flag init_nominal_cpu_frequency_once;
  325. ABSL_CONST_INIT static double nominal_cpu_frequency = 1.0;
  326. // NominalCPUFrequency() may be called before main() and before malloc is
  327. // properly initialized, therefore this must not allocate memory.
  328. double NominalCPUFrequency() {
  329. base_internal::LowLevelCallOnce(
  330. &init_nominal_cpu_frequency_once,
  331. []() { nominal_cpu_frequency = GetNominalCPUFrequency(); });
  332. return nominal_cpu_frequency;
  333. }
  334. #if defined(_WIN32)
  335. pid_t GetTID() {
  336. return pid_t{GetCurrentThreadId()};
  337. }
  338. #elif defined(__linux__)
  339. #ifndef SYS_gettid
  340. #define SYS_gettid __NR_gettid
  341. #endif
  342. pid_t GetTID() {
  343. return static_cast<pid_t>(syscall(SYS_gettid));
  344. }
  345. #elif defined(__akaros__)
  346. pid_t GetTID() {
  347. // Akaros has a concept of "vcore context", which is the state the program
  348. // is forced into when we need to make a user-level scheduling decision, or
  349. // run a signal handler. This is analogous to the interrupt context that a
  350. // CPU might enter if it encounters some kind of exception.
  351. //
  352. // There is no current thread context in vcore context, but we need to give
  353. // a reasonable answer if asked for a thread ID (e.g., in a signal handler).
  354. // Thread 0 always exists, so if we are in vcore context, we return that.
  355. //
  356. // Otherwise, we know (since we are using pthreads) that the uthread struct
  357. // current_uthread is pointing to is the first element of a
  358. // struct pthread_tcb, so we extract and return the thread ID from that.
  359. //
  360. // TODO(dcross): Akaros anticipates moving the thread ID to the uthread
  361. // structure at some point. We should modify this code to remove the cast
  362. // when that happens.
  363. if (in_vcore_context())
  364. return 0;
  365. return reinterpret_cast<struct pthread_tcb *>(current_uthread)->id;
  366. }
  367. #elif defined(__myriad2__)
  368. pid_t GetTID() {
  369. uint32_t tid;
  370. rtems_task_ident(RTEMS_SELF, 0, &tid);
  371. return tid;
  372. }
  373. #elif defined(__APPLE__)
  374. pid_t GetTID() {
  375. uint64_t tid;
  376. // `nullptr` here implies this thread. This only fails if the specified
  377. // thread is invalid or the pointer-to-tid is null, so we needn't worry about
  378. // it.
  379. pthread_threadid_np(nullptr, &tid);
  380. return static_cast<pid_t>(tid);
  381. }
  382. #elif defined(__FreeBSD__)
  383. pid_t GetTID() { return static_cast<pid_t>(pthread_getthreadid_np()); }
  384. #elif defined(__OpenBSD__)
  385. pid_t GetTID() { return getthrid(); }
  386. #elif defined(__NetBSD__)
  387. pid_t GetTID() { return static_cast<pid_t>(_lwp_self()); }
  388. #elif defined(__native_client__)
  389. pid_t GetTID() {
  390. auto* thread = pthread_self();
  391. static_assert(sizeof(pid_t) == sizeof(thread),
  392. "In NaCL int expected to be the same size as a pointer");
  393. return reinterpret_cast<pid_t>(thread);
  394. }
  395. #else
  396. // Fallback implementation of `GetTID` using `pthread_self`.
  397. pid_t GetTID() {
  398. // `pthread_t` need not be arithmetic per POSIX; platforms where it isn't
  399. // should be handled above.
  400. return static_cast<pid_t>(pthread_self());
  401. }
  402. #endif
  403. // GetCachedTID() caches the thread ID in thread-local storage (which is a
  404. // userspace construct) to avoid unnecessary system calls. Without this caching,
  405. // it can take roughly 98ns, while it takes roughly 1ns with this caching.
  406. pid_t GetCachedTID() {
  407. #ifdef ABSL_HAVE_THREAD_LOCAL
  408. static thread_local pid_t thread_id = GetTID();
  409. return thread_id;
  410. #else
  411. return GetTID();
  412. #endif // ABSL_HAVE_THREAD_LOCAL
  413. }
  414. } // namespace base_internal
  415. ABSL_NAMESPACE_END
  416. } // namespace absl