123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 |
- //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file provides the Unix specific implementation of Threading functions.
- //
- //===----------------------------------------------------------------------===//
- #include "Unix.h"
- #include "llvm/ADT/ScopeExit.h"
- #include "llvm/ADT/SmallString.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/Twine.h"
- #include "llvm/Support/MemoryBuffer.h"
- #include "llvm/Support/raw_ostream.h"
- #if defined(__APPLE__)
- #include <mach/mach_init.h>
- #include <mach/mach_port.h>
- #include <pthread/qos.h>
- #include <sys/sysctl.h>
- #include <sys/types.h>
- #endif
- #include <pthread.h>
- #if defined(__FreeBSD__) || defined(__OpenBSD__)
- #include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
- #endif
- #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- #include <errno.h>
- #include <sys/cpuset.h>
- #include <sys/sysctl.h>
- #include <sys/user.h>
- #include <unistd.h>
- #endif
- #if defined(__NetBSD__)
- #error #include <lwp.h> // For _lwp_self()
- #endif
- #if defined(__OpenBSD__)
- #include <unistd.h> // For getthrid()
- #endif
- #if defined(__linux__)
- #include <sched.h> // For sched_getaffinity
- #include <sys/syscall.h> // For syscall codes
- #include <unistd.h> // For syscall()
- #endif
- namespace llvm {
- pthread_t
- llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
- std::optional<unsigned> StackSizeInBytes) {
- int errnum;
- // Construct the attributes object.
- pthread_attr_t Attr;
- if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
- ReportErrnumFatal("pthread_attr_init failed", errnum);
- }
- auto AttrGuard = llvm::make_scope_exit([&] {
- if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
- ReportErrnumFatal("pthread_attr_destroy failed", errnum);
- }
- });
- // Set the requested stack size, if given.
- if (StackSizeInBytes) {
- if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
- ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
- }
- }
- // Construct and execute the thread.
- pthread_t Thread;
- if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
- ReportErrnumFatal("pthread_create failed", errnum);
- return Thread;
- }
- void llvm_thread_detach_impl(pthread_t Thread) {
- int errnum;
- if ((errnum = ::pthread_detach(Thread)) != 0) {
- ReportErrnumFatal("pthread_detach failed", errnum);
- }
- }
- void llvm_thread_join_impl(pthread_t Thread) {
- int errnum;
- if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
- ReportErrnumFatal("pthread_join failed", errnum);
- }
- }
- pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
- pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
- } // namespace llvm
- uint64_t llvm::get_threadid() {
- #if defined(__APPLE__)
- // Calling "mach_thread_self()" bumps the reference count on the thread
- // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
- // count.
- thread_port_t Self = mach_thread_self();
- mach_port_deallocate(mach_task_self(), Self);
- return Self;
- #elif defined(__FreeBSD__)
- return uint64_t(pthread_getthreadid_np());
- #elif defined(__NetBSD__)
- return uint64_t(_lwp_self());
- #elif defined(__OpenBSD__)
- return uint64_t(getthrid());
- #elif defined(__ANDROID__)
- return uint64_t(gettid());
- #elif defined(__linux__)
- return uint64_t(syscall(SYS_gettid));
- #else
- return uint64_t(pthread_self());
- #endif
- }
- static constexpr uint32_t get_max_thread_name_length_impl() {
- #if defined(__NetBSD__)
- return PTHREAD_MAX_NAMELEN_NP;
- #elif defined(__APPLE__)
- return 64;
- #elif defined(__linux__)
- #if HAVE_PTHREAD_SETNAME_NP
- return 16;
- #else
- return 0;
- #endif
- #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- return 16;
- #elif defined(__OpenBSD__)
- return 32;
- #else
- return 0;
- #endif
- }
- uint32_t llvm::get_max_thread_name_length() {
- return get_max_thread_name_length_impl();
- }
- void llvm::set_thread_name(const Twine &Name) {
- // Make sure the input is null terminated.
- SmallString<64> Storage;
- StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
- // Truncate from the beginning, not the end, if the specified name is too
- // long. For one, this ensures that the resulting string is still null
- // terminated, but additionally the end of a long thread name will usually
- // be more unique than the beginning, since a common pattern is for similar
- // threads to share a common prefix.
- // Note that the name length includes the null terminator.
- if (get_max_thread_name_length() > 0)
- NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
- (void)NameStr;
- #if defined(__linux__)
- #if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
- #if HAVE_PTHREAD_SETNAME_NP
- ::pthread_setname_np(::pthread_self(), NameStr.data());
- #endif
- #endif
- #elif defined(__FreeBSD__) || defined(__OpenBSD__)
- ::pthread_set_name_np(::pthread_self(), NameStr.data());
- #elif defined(__NetBSD__)
- ::pthread_setname_np(::pthread_self(), "%s",
- const_cast<char *>(NameStr.data()));
- #elif defined(__APPLE__)
- ::pthread_setname_np(NameStr.data());
- #endif
- }
- void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
- Name.clear();
- #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
- int pid = ::getpid();
- uint64_t tid = get_threadid();
- struct kinfo_proc *kp = nullptr, *nkp;
- size_t len = 0;
- int error;
- int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
- (int)pid};
- while (1) {
- error = sysctl(ctl, 4, kp, &len, nullptr, 0);
- if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
- // Add extra space in case threads are added before next call.
- len += sizeof(*kp) + len / 10;
- nkp = (struct kinfo_proc *)::realloc(kp, len);
- if (nkp == nullptr) {
- free(kp);
- return;
- }
- kp = nkp;
- continue;
- }
- if (error != 0)
- len = 0;
- break;
- }
- for (size_t i = 0; i < len / sizeof(*kp); i++) {
- if (kp[i].ki_tid == (lwpid_t)tid) {
- Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
- break;
- }
- }
- free(kp);
- return;
- #elif defined(__NetBSD__)
- constexpr uint32_t len = get_max_thread_name_length_impl();
- char buf[len];
- ::pthread_getname_np(::pthread_self(), buf, len);
- Name.append(buf, buf + strlen(buf));
- #elif defined(__OpenBSD__)
- constexpr uint32_t len = get_max_thread_name_length_impl();
- char buf[len];
- ::pthread_get_name_np(::pthread_self(), buf, len);
- Name.append(buf, buf + strlen(buf));
- #elif defined(__linux__)
- #if HAVE_PTHREAD_GETNAME_NP
- constexpr uint32_t len = get_max_thread_name_length_impl();
- char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
- if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
- Name.append(Buffer, Buffer + strlen(Buffer));
- #endif
- #endif
- }
- SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
- #if defined(__linux__) && defined(SCHED_IDLE)
- // Some *really* old glibcs are missing SCHED_IDLE.
- // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
- // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
- sched_param priority;
- // For each of the above policies, param->sched_priority must be 0.
- priority.sched_priority = 0;
- // SCHED_IDLE for running very low priority background jobs.
- // SCHED_OTHER the standard round-robin time-sharing policy;
- return !pthread_setschedparam(
- pthread_self(),
- // FIXME: consider SCHED_BATCH for Low
- Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
- &priority)
- ? SetThreadPriorityResult::SUCCESS
- : SetThreadPriorityResult::FAILURE;
- #elif defined(__APPLE__)
- // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
- //
- // Background - Applies to work that isn’t visible to the user and may take
- // significant time to complete. Examples include indexing, backing up, or
- // synchronizing data. This class emphasizes energy efficiency.
- //
- // Utility - Applies to work that takes anywhere from a few seconds to a few
- // minutes to complete. Examples include downloading a document or importing
- // data. This class offers a balance between responsiveness, performance, and
- // energy efficiency.
- const auto qosClass = [&]() {
- switch (Priority) {
- case ThreadPriority::Background:
- return QOS_CLASS_BACKGROUND;
- case ThreadPriority::Low:
- return QOS_CLASS_UTILITY;
- case ThreadPriority::Default:
- return QOS_CLASS_DEFAULT;
- }
- }();
- return !pthread_set_qos_class_self_np(qosClass, 0)
- ? SetThreadPriorityResult::SUCCESS
- : SetThreadPriorityResult::FAILURE;
- #endif
- return SetThreadPriorityResult::FAILURE;
- }
- #include <thread>
- static int computeHostNumHardwareThreads() {
- #if defined(__FreeBSD__)
- cpuset_t mask;
- CPU_ZERO(&mask);
- if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
- &mask) == 0)
- return CPU_COUNT(&mask);
- #elif defined(__linux__)
- cpu_set_t Set;
- if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
- return CPU_COUNT(&Set);
- #endif
- // Guard against std::thread::hardware_concurrency() returning 0.
- if (unsigned Val = std::thread::hardware_concurrency())
- return Val;
- return 1;
- }
- void llvm::ThreadPoolStrategy::apply_thread_strategy(
- unsigned ThreadPoolNum) const {}
- llvm::BitVector llvm::get_thread_affinity_mask() {
- // FIXME: Implement
- llvm_unreachable("Not implemented!");
- }
- unsigned llvm::get_cpus() { return 1; }
- #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
- // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
- // using the number of unique physical/core id pairs. The following
- // implementation reads the /proc/cpuinfo format on an x86_64 system.
- static int computeHostNumPhysicalCores() {
- // Enabled represents the number of physical id/core id pairs with at least
- // one processor id enabled by the CPU affinity mask.
- cpu_set_t Affinity, Enabled;
- if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
- return -1;
- CPU_ZERO(&Enabled);
- // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
- // mmapped because it appears to have 0 size.
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
- llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
- if (std::error_code EC = Text.getError()) {
- llvm::errs() << "Can't read "
- << "/proc/cpuinfo: " << EC.message() << "\n";
- return -1;
- }
- SmallVector<StringRef, 8> strs;
- (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
- /*KeepEmpty=*/false);
- int CurProcessor = -1;
- int CurPhysicalId = -1;
- int CurSiblings = -1;
- int CurCoreId = -1;
- for (StringRef Line : strs) {
- std::pair<StringRef, StringRef> Data = Line.split(':');
- auto Name = Data.first.trim();
- auto Val = Data.second.trim();
- // These fields are available if the kernel is configured with CONFIG_SMP.
- if (Name == "processor")
- Val.getAsInteger(10, CurProcessor);
- else if (Name == "physical id")
- Val.getAsInteger(10, CurPhysicalId);
- else if (Name == "siblings")
- Val.getAsInteger(10, CurSiblings);
- else if (Name == "core id") {
- Val.getAsInteger(10, CurCoreId);
- // The processor id corresponds to an index into cpu_set_t.
- if (CPU_ISSET(CurProcessor, &Affinity))
- CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
- }
- }
- return CPU_COUNT(&Enabled);
- }
- #elif defined(__linux__) && defined(__s390x__)
- static int computeHostNumPhysicalCores() {
- return sysconf(_SC_NPROCESSORS_ONLN);
- }
- #elif defined(__linux__) && !defined(__ANDROID__)
- static int computeHostNumPhysicalCores() {
- cpu_set_t Affinity;
- if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
- return CPU_COUNT(&Affinity);
- // The call to sched_getaffinity() may have failed because the Affinity
- // mask is too small for the number of CPU's on the system (i.e. the
- // system has more than 1024 CPUs). Allocate a mask large enough for
- // twice as many CPUs.
- cpu_set_t *DynAffinity;
- DynAffinity = CPU_ALLOC(2048);
- if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
- int NumCPUs = CPU_COUNT(DynAffinity);
- CPU_FREE(DynAffinity);
- return NumCPUs;
- }
- return -1;
- }
- #elif defined(__APPLE__)
- // Gets the number of *physical cores* on the machine.
- static int computeHostNumPhysicalCores() {
- uint32_t count;
- size_t len = sizeof(count);
- sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
- if (count < 1) {
- int nm[2];
- nm[0] = CTL_HW;
- nm[1] = HW_AVAILCPU;
- sysctl(nm, 2, &count, &len, NULL, 0);
- if (count < 1)
- return -1;
- }
- return count;
- }
- #elif defined(__MVS__)
- static int computeHostNumPhysicalCores() {
- enum {
- // Byte offset of the pointer to the Communications Vector Table (CVT) in
- // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
- // will be zero-extended to uintptr_t.
- FLCCVT = 16,
- // Byte offset of the pointer to the Common System Data Area (CSD) in the
- // CVT. The table entry is a 31-bit pointer and will be zero-extended to
- // uintptr_t.
- CVTCSD = 660,
- // Byte offset to the number of live CPs in the LPAR, stored as a signed
- // 32-bit value in the table.
- CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
- };
- char *PSA = 0;
- char *CVT = reinterpret_cast<char *>(
- static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
- char *CSD = reinterpret_cast<char *>(
- static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
- return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
- }
- #else
- // On other systems, return -1 to indicate unknown.
- static int computeHostNumPhysicalCores() { return -1; }
- #endif
- int llvm::get_physical_cores() {
- static int NumCores = computeHostNumPhysicalCores();
- return NumCores;
- }
|