Threading.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file declares helper functions for running LLVM in a multi-threaded
  15. // environment.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_SUPPORT_THREADING_H
  19. #define LLVM_SUPPORT_THREADING_H
  20. #include "llvm/ADT/BitVector.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
  23. #include "llvm/Support/Compiler.h"
  24. #include <ciso646> // So we can check the C++ standard lib macros.
  25. #include <optional>
  26. #if defined(_MSC_VER)
  27. // MSVC's call_once implementation worked since VS 2015, which is the minimum
  28. // supported version as of this writing.
  29. #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  30. #elif defined(LLVM_ON_UNIX) && \
  31. (defined(_LIBCPP_VERSION) || \
  32. !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__)))
  33. // std::call_once from libc++ is used on all Unix platforms. Other
  34. // implementations like libstdc++ are known to have problems on NetBSD,
  35. // OpenBSD and PowerPC.
  36. #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  37. #elif defined(LLVM_ON_UNIX) && \
  38. (defined(__powerpc__) && defined(__LITTLE_ENDIAN__))
  39. #define LLVM_THREADING_USE_STD_CALL_ONCE 1
  40. #else
  41. #define LLVM_THREADING_USE_STD_CALL_ONCE 0
  42. #endif
  43. #if LLVM_THREADING_USE_STD_CALL_ONCE
  44. #include <mutex>
  45. #else
  46. #include "llvm/Support/Atomic.h"
  47. #endif
  48. namespace llvm {
  49. class Twine;
  50. /// Returns true if LLVM is compiled with support for multi-threading, and
  51. /// false otherwise.
  52. constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
  53. #if LLVM_THREADING_USE_STD_CALL_ONCE
  54. typedef std::once_flag once_flag;
  55. #else
  56. enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
  57. /// The llvm::once_flag structure
  58. ///
  59. /// This type is modeled after std::once_flag to use with llvm::call_once.
  60. /// This structure must be used as an opaque object. It is a struct to force
  61. /// autoinitialization and behave like std::once_flag.
  62. struct once_flag {
  63. volatile sys::cas_flag status = Uninitialized;
  64. };
  65. #endif
  66. /// Execute the function specified as a parameter once.
  67. ///
  68. /// Typical usage:
  69. /// \code
  70. /// void foo() {...};
  71. /// ...
  72. /// static once_flag flag;
  73. /// call_once(flag, foo);
  74. /// \endcode
  75. ///
  76. /// \param flag Flag used for tracking whether or not this has run.
  77. /// \param F Function to call once.
  78. template <typename Function, typename... Args>
  79. void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
  80. #if LLVM_THREADING_USE_STD_CALL_ONCE
  81. std::call_once(flag, std::forward<Function>(F),
  82. std::forward<Args>(ArgList)...);
  83. #else
  84. // For other platforms we use a generic (if brittle) version based on our
  85. // atomics.
  86. sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
  87. if (old_val == Uninitialized) {
  88. std::forward<Function>(F)(std::forward<Args>(ArgList)...);
  89. sys::MemoryFence();
  90. TsanIgnoreWritesBegin();
  91. TsanHappensBefore(&flag.status);
  92. flag.status = Done;
  93. TsanIgnoreWritesEnd();
  94. } else {
  95. // Wait until any thread doing the call has finished.
  96. sys::cas_flag tmp = flag.status;
  97. sys::MemoryFence();
  98. while (tmp != Done) {
  99. tmp = flag.status;
  100. sys::MemoryFence();
  101. }
  102. }
  103. TsanHappensAfter(&flag.status);
  104. #endif
  105. }
  106. /// This tells how a thread pool will be used
  107. class ThreadPoolStrategy {
  108. public:
  109. // The default value (0) means all available threads should be used,
  110. // taking the affinity mask into account. If set, this value only represents
  111. // a suggested high bound, the runtime might choose a lower value (not
  112. // higher).
  113. unsigned ThreadsRequested = 0;
  114. // If SMT is active, use hyper threads. If false, there will be only one
  115. // std::thread per core.
  116. bool UseHyperThreads = true;
  117. // If set, will constrain 'ThreadsRequested' to the number of hardware
  118. // threads, or hardware cores.
  119. bool Limit = false;
  120. /// Retrieves the max available threads for the current strategy. This
  121. /// accounts for affinity masks and takes advantage of all CPU sockets.
  122. unsigned compute_thread_count() const;
  123. /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
  124. /// multi-socket system, this ensures threads are assigned to all CPU
  125. /// sockets. \p ThreadPoolNum represents a number bounded by [0,
  126. /// compute_thread_count()).
  127. void apply_thread_strategy(unsigned ThreadPoolNum) const;
  128. /// Finds the CPU socket where a thread should go. Returns 'std::nullopt' if
  129. /// the thread shall remain on the actual CPU socket.
  130. std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
  131. };
  132. /// Build a strategy from a number of threads as a string provided in \p Num.
  133. /// When Num is above the max number of threads specified by the \p Default
  134. /// strategy, we attempt to equally allocate the threads on all CPU sockets.
  135. /// "0" or an empty string will return the \p Default strategy.
  136. /// "all" for using all hardware threads.
  137. std::optional<ThreadPoolStrategy>
  138. get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
  139. /// Returns a thread strategy for tasks requiring significant memory or other
  140. /// resources. To be used for workloads where hardware_concurrency() proves to
  141. /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
  142. /// based on physical cores, if available for the host system, otherwise falls
  143. /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
  144. /// LLVM_ENABLE_THREADS = OFF.
  145. inline ThreadPoolStrategy
  146. heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
  147. ThreadPoolStrategy S;
  148. S.UseHyperThreads = false;
  149. S.ThreadsRequested = ThreadCount;
  150. return S;
  151. }
  152. /// Like heavyweight_hardware_concurrency() above, but builds a strategy
  153. /// based on the rules described for get_threadpool_strategy().
  154. /// If \p Num is invalid, returns a default strategy where one thread per
  155. /// hardware core is used.
  156. inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
  157. std::optional<ThreadPoolStrategy> S =
  158. get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
  159. if (S)
  160. return *S;
  161. return heavyweight_hardware_concurrency();
  162. }
  163. /// Returns a default thread strategy where all available hardware resources
  164. /// are to be used, except for those initially excluded by an affinity mask.
  165. /// This function takes affinity into consideration. Returns 1 when LLVM is
  166. /// configured with LLVM_ENABLE_THREADS=OFF.
  167. inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
  168. ThreadPoolStrategy S;
  169. S.ThreadsRequested = ThreadCount;
  170. return S;
  171. }
  172. /// Returns an optimal thread strategy to execute specified amount of tasks.
  173. /// This strategy should prevent us from creating too many threads if we
  174. /// occasionaly have an unexpectedly small amount of tasks.
  175. inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
  176. ThreadPoolStrategy S;
  177. S.Limit = true;
  178. S.ThreadsRequested = TaskCount;
  179. return S;
  180. }
  181. /// Return the current thread id, as used in various OS system calls.
  182. /// Note that not all platforms guarantee that the value returned will be
  183. /// unique across the entire system, so portable code should not assume
  184. /// this.
  185. uint64_t get_threadid();
  186. /// Get the maximum length of a thread name on this platform.
  187. /// A value of 0 means there is no limit.
  188. uint32_t get_max_thread_name_length();
  189. /// Set the name of the current thread. Setting a thread's name can
  190. /// be helpful for enabling useful diagnostics under a debugger or when
  191. /// logging. The level of support for setting a thread's name varies
  192. /// wildly across operating systems, and we only make a best effort to
  193. /// perform the operation on supported platforms. No indication of success
  194. /// or failure is returned.
  195. void set_thread_name(const Twine &Name);
  196. /// Get the name of the current thread. The level of support for
  197. /// getting a thread's name varies wildly across operating systems, and it
  198. /// is not even guaranteed that if you can successfully set a thread's name
  199. /// that you can later get it back. This function is intended for diagnostic
  200. /// purposes, and as with setting a thread's name no indication of whether
  201. /// the operation succeeded or failed is returned.
  202. void get_thread_name(SmallVectorImpl<char> &Name);
  203. /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
  204. /// group, the calling thread can be executed. On Windows, threads cannot
  205. /// cross CPU sockets boundaries.
  206. llvm::BitVector get_thread_affinity_mask();
  207. /// Returns how many physical CPUs or NUMA groups the system has.
  208. unsigned get_cpus();
  209. /// Returns how many physical cores (as opposed to logical cores returned from
  210. /// thread::hardware_concurrency(), which includes hyperthreads).
  211. /// Returns -1 if unknown for the current host system.
  212. int get_physical_cores();
  213. enum class ThreadPriority {
  214. /// Lower the current thread's priority as much as possible. Can be used
  215. /// for long-running tasks that are not time critical; more energy-
  216. /// efficient than Low.
  217. Background = 0,
  218. /// Lower the current thread's priority such that it does not affect
  219. /// foreground tasks significantly. This is a good default for long-
  220. /// running, latency-insensitive tasks to make sure cpu is not hogged
  221. /// by this task.
  222. Low = 1,
  223. /// Restore the current thread's priority to default scheduling priority.
  224. Default = 2,
  225. };
  226. enum class SetThreadPriorityResult { FAILURE, SUCCESS };
  227. SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
  228. }
  229. #endif
  230. #ifdef __GNUC__
  231. #pragma GCC diagnostic pop
  232. #endif