Threading.inc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file provides the Win32 specific implementation of Threading functions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/ADT/SmallString.h"
  13. #include "llvm/ADT/Twine.h"
  14. #include "llvm/Support/Windows/WindowsSupport.h"
  15. #include <process.h>
  16. #include <bitset>
  17. // Windows will at times define MemoryFence.
  18. #ifdef MemoryFence
  19. #undef MemoryFence
  20. #endif
  21. namespace llvm {
  22. HANDLE
  23. llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
  24. llvm::Optional<unsigned> StackSizeInBytes) {
  25. HANDLE hThread = (HANDLE)::_beginthreadex(
  26. NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
  27. if (!hThread) {
  28. ReportLastErrorFatal("_beginthreadex failed");
  29. }
  30. return hThread;
  31. }
  32. void llvm_thread_join_impl(HANDLE hThread) {
  33. if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
  34. ReportLastErrorFatal("WaitForSingleObject failed");
  35. }
  36. }
  37. void llvm_thread_detach_impl(HANDLE hThread) {
  38. if (::CloseHandle(hThread) == FALSE) {
  39. ReportLastErrorFatal("CloseHandle failed");
  40. }
  41. }
  42. DWORD llvm_thread_get_id_impl(HANDLE hThread) {
  43. return ::GetThreadId(hThread);
  44. }
  45. DWORD llvm_thread_get_current_id_impl() {
  46. return ::GetCurrentThreadId();
  47. }
  48. } // namespace llvm
  49. uint64_t llvm::get_threadid() {
  50. return uint64_t(::GetCurrentThreadId());
  51. }
  52. uint32_t llvm::get_max_thread_name_length() { return 0; }
  53. #if defined(_MSC_VER)
  54. static void SetThreadName(DWORD Id, LPCSTR Name) {
  55. constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
  56. #pragma pack(push, 8)
  57. struct THREADNAME_INFO {
  58. DWORD dwType; // Must be 0x1000.
  59. LPCSTR szName; // Pointer to thread name
  60. DWORD dwThreadId; // Thread ID (-1 == current thread)
  61. DWORD dwFlags; // Reserved. Do not use.
  62. };
  63. #pragma pack(pop)
  64. THREADNAME_INFO info;
  65. info.dwType = 0x1000;
  66. info.szName = Name;
  67. info.dwThreadId = Id;
  68. info.dwFlags = 0;
  69. __try {
  70. ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
  71. (ULONG_PTR *)&info);
  72. }
  73. __except (EXCEPTION_EXECUTE_HANDLER) {
  74. }
  75. }
  76. #endif
  77. void llvm::set_thread_name(const Twine &Name) {
  78. #if defined(_MSC_VER)
  79. // Make sure the input is null terminated.
  80. SmallString<64> Storage;
  81. StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
  82. SetThreadName(::GetCurrentThreadId(), NameStr.data());
  83. #endif
  84. }
  85. void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
  86. // "Name" is not an inherent property of a thread on Windows. In fact, when
  87. // you "set" the name, you are only firing a one-time message to a debugger
  88. // which it interprets as a program setting its threads' name. We may be
  89. // able to get fancy by creating a TLS entry when someone calls
  90. // set_thread_name so that subsequent calls to get_thread_name return this
  91. // value.
  92. Name.clear();
  93. }
  94. SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
  95. // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
  96. // Begin background processing mode. The system lowers the resource scheduling
  97. // priorities of the thread so that it can perform background work without
  98. // significantly affecting activity in the foreground.
  99. // End background processing mode. The system restores the resource scheduling
  100. // priorities of the thread as they were before the thread entered background
  101. // processing mode.
  102. return SetThreadPriority(GetCurrentThread(),
  103. Priority == ThreadPriority::Background
  104. ? THREAD_MODE_BACKGROUND_BEGIN
  105. : THREAD_MODE_BACKGROUND_END)
  106. ? SetThreadPriorityResult::SUCCESS
  107. : SetThreadPriorityResult::FAILURE;
  108. }
  109. struct ProcessorGroup {
  110. unsigned ID;
  111. unsigned AllThreads;
  112. unsigned UsableThreads;
  113. unsigned ThreadsPerCore;
  114. uint64_t Affinity;
  115. unsigned useableCores() const {
  116. return std::max(1U, UsableThreads / ThreadsPerCore);
  117. }
  118. };
  119. template <typename F>
  120. static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
  121. DWORD Len = 0;
  122. BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
  123. if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
  124. return false;
  125. }
  126. auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
  127. R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
  128. if (R) {
  129. auto *End =
  130. (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
  131. for (auto *Curr = Info; Curr < End;
  132. Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
  133. Curr->Size)) {
  134. if (Curr->Relationship != Relationship)
  135. continue;
  136. Fn(Curr);
  137. }
  138. }
  139. free(Info);
  140. return true;
  141. }
  142. static ArrayRef<ProcessorGroup> getProcessorGroups() {
  143. auto computeGroups = []() {
  144. SmallVector<ProcessorGroup, 4> Groups;
  145. auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
  146. GROUP_RELATIONSHIP &El = ProcInfo->Group;
  147. for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
  148. ProcessorGroup G;
  149. G.ID = Groups.size();
  150. G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
  151. G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
  152. assert(G.UsableThreads <= 64);
  153. G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
  154. Groups.push_back(G);
  155. }
  156. };
  157. if (!IterateProcInfo(RelationGroup, HandleGroup))
  158. return std::vector<ProcessorGroup>();
  159. auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
  160. PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
  161. assert(El.GroupCount == 1);
  162. unsigned NumHyperThreads = 1;
  163. // If the flag is set, each core supports more than one hyper-thread.
  164. if (El.Flags & LTP_PC_SMT)
  165. NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
  166. unsigned I = El.GroupMask[0].Group;
  167. Groups[I].ThreadsPerCore = NumHyperThreads;
  168. };
  169. if (!IterateProcInfo(RelationProcessorCore, HandleProc))
  170. return std::vector<ProcessorGroup>();
  171. // If there's an affinity mask set, assume the user wants to constrain the
  172. // current process to only a single CPU group. On Windows, it is not
  173. // possible for affinity masks to cross CPU group boundaries.
  174. DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
  175. if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
  176. &SystemAffinityMask) &&
  177. ProcessAffinityMask != SystemAffinityMask) {
  178. // We don't expect more that 4 CPU groups on Windows (256 processors).
  179. USHORT GroupCount = 4;
  180. USHORT GroupArray[4]{};
  181. if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
  182. GroupArray)) {
  183. assert(GroupCount == 1 &&
  184. "On startup, a program is expected to be assigned only to "
  185. "one processor group!");
  186. unsigned CurrentGroupID = GroupArray[0];
  187. ProcessorGroup NewG{Groups[CurrentGroupID]};
  188. NewG.Affinity = ProcessAffinityMask;
  189. NewG.UsableThreads = countPopulation(ProcessAffinityMask);
  190. Groups.clear();
  191. Groups.push_back(NewG);
  192. }
  193. }
  194. return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
  195. };
  196. static auto Groups = computeGroups();
  197. return ArrayRef<ProcessorGroup>(Groups);
  198. }
  199. template <typename R, typename UnaryPredicate>
  200. static unsigned aggregate(R &&Range, UnaryPredicate P) {
  201. unsigned I{};
  202. for (const auto &It : Range)
  203. I += P(It);
  204. return I;
  205. }
  206. // for sys::getHostNumPhysicalCores
  207. int computeHostNumPhysicalCores() {
  208. static unsigned Cores =
  209. aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
  210. return G.UsableThreads / G.ThreadsPerCore;
  211. });
  212. return Cores;
  213. }
  214. int computeHostNumHardwareThreads() {
  215. static unsigned Threads =
  216. aggregate(getProcessorGroups(),
  217. [](const ProcessorGroup &G) { return G.UsableThreads; });
  218. return Threads;
  219. }
  220. // Finds the proper CPU socket where a thread number should go. Returns 'None'
  221. // if the thread shall remain on the actual CPU socket.
  222. Optional<unsigned>
  223. llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
  224. ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
  225. // Only one CPU socket in the system or process affinity was set, no need to
  226. // move the thread(s) to another CPU socket.
  227. if (Groups.size() <= 1)
  228. return None;
  229. // We ask for less threads than there are hardware threads per CPU socket, no
  230. // need to dispatch threads to other CPU sockets.
  231. unsigned MaxThreadsPerSocket =
  232. UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
  233. if (compute_thread_count() <= MaxThreadsPerSocket)
  234. return None;
  235. assert(ThreadPoolNum < compute_thread_count() &&
  236. "The thread index is not within thread strategy's range!");
  237. // Assumes the same number of hardware threads per CPU socket.
  238. return (ThreadPoolNum * Groups.size()) / compute_thread_count();
  239. }
  240. // Assign the current thread to a more appropriate CPU socket or CPU group
  241. void llvm::ThreadPoolStrategy::apply_thread_strategy(
  242. unsigned ThreadPoolNum) const {
  243. Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
  244. if (!Socket)
  245. return;
  246. ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
  247. GROUP_AFFINITY Affinity{};
  248. Affinity.Group = Groups[*Socket].ID;
  249. Affinity.Mask = Groups[*Socket].Affinity;
  250. SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
  251. }
  252. llvm::BitVector llvm::get_thread_affinity_mask() {
  253. GROUP_AFFINITY Affinity{};
  254. GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
  255. static unsigned All =
  256. aggregate(getProcessorGroups(),
  257. [](const ProcessorGroup &G) { return G.AllThreads; });
  258. unsigned StartOffset =
  259. aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
  260. return G.ID < Affinity.Group ? G.AllThreads : 0;
  261. });
  262. llvm::BitVector V;
  263. V.resize(All);
  264. for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
  265. if ((Affinity.Mask >> I) & 1)
  266. V.set(StartOffset + I);
  267. }
  268. return V;
  269. }
  270. unsigned llvm::get_cpus() { return getProcessorGroups().size(); }