12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202 |
- /*
- * kmp_affinity.h -- header for affinity management
- */
- //===----------------------------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef KMP_AFFINITY_H
- #define KMP_AFFINITY_H
- #include "kmp.h"
- #include "kmp_os.h"
- #include <limits>
- #if KMP_AFFINITY_SUPPORTED
- #if KMP_USE_HWLOC
- class KMPHwlocAffinity : public KMPAffinity {
- public:
- class Mask : public KMPAffinity::Mask {
- hwloc_cpuset_t mask;
- public:
- Mask() {
- mask = hwloc_bitmap_alloc();
- this->zero();
- }
- ~Mask() { hwloc_bitmap_free(mask); }
- void set(int i) override { hwloc_bitmap_set(mask, i); }
- bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
- void clear(int i) override { hwloc_bitmap_clr(mask, i); }
- void zero() override { hwloc_bitmap_zero(mask); }
- void copy(const KMPAffinity::Mask *src) override {
- const Mask *convert = static_cast<const Mask *>(src);
- hwloc_bitmap_copy(mask, convert->mask);
- }
- void bitwise_and(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- hwloc_bitmap_and(mask, mask, convert->mask);
- }
- void bitwise_or(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- hwloc_bitmap_or(mask, mask, convert->mask);
- }
- void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
- int begin() const override { return hwloc_bitmap_first(mask); }
- int end() const override { return -1; }
- int next(int previous) const override {
- return hwloc_bitmap_next(mask, previous);
- }
- int get_system_affinity(bool abort_on_error) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- long retval =
- hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- int set_system_affinity(bool abort_on_error) const override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
- long retval =
- hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- #if KMP_OS_WINDOWS
- int set_process_affinity(bool abort_on_error) const override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set process affinity operation when not capable");
- int error = 0;
- const hwloc_topology_support *support =
- hwloc_topology_get_support(__kmp_hwloc_topology);
- if (support->cpubind->set_proc_cpubind) {
- int retval;
- retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
- HWLOC_CPUBIND_PROCESS);
- if (retval >= 0)
- return 0;
- error = errno;
- if (abort_on_error)
- __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- #endif
- int get_proc_group() const override {
- int group = -1;
- #if KMP_OS_WINDOWS
- if (__kmp_num_proc_groups == 1) {
- return 1;
- }
- for (int i = 0; i < __kmp_num_proc_groups; i++) {
- // On windows, the long type is always 32 bits
- unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
- unsigned long second_32_bits =
- hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
- if (first_32_bits == 0 && second_32_bits == 0) {
- continue;
- }
- if (group >= 0) {
- return -1;
- }
- group = i;
- }
- #endif /* KMP_OS_WINDOWS */
- return group;
- }
- };
- void determine_capable(const char *var) override {
- const hwloc_topology_support *topology_support;
- if (__kmp_hwloc_topology == NULL) {
- if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if (__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
- }
- if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if (__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
- }
- }
- topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
- // Is the system capable of setting/getting this thread's affinity?
- // Also, is topology discovery possible? (pu indicates ability to discover
- // processing units). And finally, were there no errors when calling any
- // hwloc_* API functions?
- if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
- topology_support->cpubind->get_thisthread_cpubind &&
- topology_support->discovery->pu && !__kmp_hwloc_error) {
- // enables affinity according to KMP_AFFINITY_CAPABLE() macro
- KMP_AFFINITY_ENABLE(TRUE);
- } else {
- // indicate that hwloc didn't work and disable affinity
- __kmp_hwloc_error = TRUE;
- KMP_AFFINITY_DISABLE();
- }
- }
- void bind_thread(int which) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
- KMPAffinity::Mask *mask;
- KMP_CPU_ALLOC_ON_STACK(mask);
- KMP_CPU_ZERO(mask);
- KMP_CPU_SET(which, mask);
- __kmp_set_system_affinity(mask, TRUE);
- KMP_CPU_FREE_FROM_STACK(mask);
- }
- KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
- void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
- KMPAffinity::Mask *allocate_mask_array(int num) override {
- return new Mask[num];
- }
- void deallocate_mask_array(KMPAffinity::Mask *array) override {
- Mask *hwloc_array = static_cast<Mask *>(array);
- delete[] hwloc_array;
- }
- KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
- int index) override {
- Mask *hwloc_array = static_cast<Mask *>(array);
- return &(hwloc_array[index]);
- }
- api_type get_api_type() const override { return HWLOC; }
- };
- #endif /* KMP_USE_HWLOC */
- #if KMP_OS_LINUX || KMP_OS_FREEBSD
- #if KMP_OS_LINUX
- /* On some of the older OS's that we build on, these constants aren't present
- in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
- all systems of the same arch where they are defined, and they cannot change.
- stone forever. */
- #include <sys/syscall.h>
- #if KMP_ARCH_X86 || KMP_ARCH_ARM
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 241
- #elif __NR_sched_setaffinity != 241
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 242
- #elif __NR_sched_getaffinity != 242
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #elif KMP_ARCH_AARCH64
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 122
- #elif __NR_sched_setaffinity != 122
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 123
- #elif __NR_sched_getaffinity != 123
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #elif KMP_ARCH_X86_64
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 203
- #elif __NR_sched_setaffinity != 203
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 204
- #elif __NR_sched_getaffinity != 204
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #elif KMP_ARCH_PPC64
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 222
- #elif __NR_sched_setaffinity != 222
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 223
- #elif __NR_sched_getaffinity != 223
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #elif KMP_ARCH_MIPS
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 4239
- #elif __NR_sched_setaffinity != 4239
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 4240
- #elif __NR_sched_getaffinity != 4240
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #elif KMP_ARCH_MIPS64
- #ifndef __NR_sched_setaffinity
- #define __NR_sched_setaffinity 5195
- #elif __NR_sched_setaffinity != 5195
- #error Wrong code for setaffinity system call.
- #endif /* __NR_sched_setaffinity */
- #ifndef __NR_sched_getaffinity
- #define __NR_sched_getaffinity 5196
- #elif __NR_sched_getaffinity != 5196
- #error Wrong code for getaffinity system call.
- #endif /* __NR_sched_getaffinity */
- #error Unknown or unsupported architecture
- #endif /* KMP_ARCH_* */
- #elif KMP_OS_FREEBSD
- #include <pthread.h>
- #include <pthread_np.h>
- #endif
- class KMPNativeAffinity : public KMPAffinity {
- class Mask : public KMPAffinity::Mask {
- typedef unsigned long mask_t;
- typedef decltype(__kmp_affin_mask_size) mask_size_type;
- static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
- static const mask_t ONE = 1;
- mask_size_type get_num_mask_types() const {
- return __kmp_affin_mask_size / sizeof(mask_t);
- }
- public:
- mask_t *mask;
- Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
- ~Mask() {
- if (mask)
- __kmp_free(mask);
- }
- void set(int i) override {
- mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
- }
- bool is_set(int i) const override {
- return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
- }
- void clear(int i) override {
- mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
- }
- void zero() override {
- mask_size_type e = get_num_mask_types();
- for (mask_size_type i = 0; i < e; ++i)
- mask[i] = (mask_t)0;
- }
- void copy(const KMPAffinity::Mask *src) override {
- const Mask *convert = static_cast<const Mask *>(src);
- mask_size_type e = get_num_mask_types();
- for (mask_size_type i = 0; i < e; ++i)
- mask[i] = convert->mask[i];
- }
- void bitwise_and(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- mask_size_type e = get_num_mask_types();
- for (mask_size_type i = 0; i < e; ++i)
- mask[i] &= convert->mask[i];
- }
- void bitwise_or(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- mask_size_type e = get_num_mask_types();
- for (mask_size_type i = 0; i < e; ++i)
- mask[i] |= convert->mask[i];
- }
- void bitwise_not() override {
- mask_size_type e = get_num_mask_types();
- for (mask_size_type i = 0; i < e; ++i)
- mask[i] = ~(mask[i]);
- }
- int begin() const override {
- int retval = 0;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int end() const override {
- int e;
- __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
- return e;
- }
- int next(int previous) const override {
- int retval = previous + 1;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int get_system_affinity(bool abort_on_error) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- #if KMP_OS_LINUX
- long retval =
- syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
- #elif KMP_OS_FREEBSD
- int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
- reinterpret_cast<cpuset_t *>(mask));
- int retval = (r == 0 ? 0 : -1);
- #endif
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- int set_system_affinity(bool abort_on_error) const override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
- #if KMP_OS_LINUX
- long retval =
- syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
- #elif KMP_OS_FREEBSD
- int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
- reinterpret_cast<cpuset_t *>(mask));
- int retval = (r == 0 ? 0 : -1);
- #endif
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- };
- void determine_capable(const char *env_var) override {
- __kmp_affinity_determine_capable(env_var);
- }
- void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
- KMPAffinity::Mask *allocate_mask() override {
- KMPNativeAffinity::Mask *retval = new Mask();
- return retval;
- }
- void deallocate_mask(KMPAffinity::Mask *m) override {
- KMPNativeAffinity::Mask *native_mask =
- static_cast<KMPNativeAffinity::Mask *>(m);
- delete native_mask;
- }
- KMPAffinity::Mask *allocate_mask_array(int num) override {
- return new Mask[num];
- }
- void deallocate_mask_array(KMPAffinity::Mask *array) override {
- Mask *linux_array = static_cast<Mask *>(array);
- delete[] linux_array;
- }
- KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
- int index) override {
- Mask *linux_array = static_cast<Mask *>(array);
- return &(linux_array[index]);
- }
- api_type get_api_type() const override { return NATIVE_OS; }
- };
- #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
- #if KMP_OS_WINDOWS
- class KMPNativeAffinity : public KMPAffinity {
- class Mask : public KMPAffinity::Mask {
- typedef ULONG_PTR mask_t;
- static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
- mask_t *mask;
- public:
- Mask() {
- mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
- }
- ~Mask() {
- if (mask)
- __kmp_free(mask);
- }
- void set(int i) override {
- mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
- }
- bool is_set(int i) const override {
- return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
- }
- void clear(int i) override {
- mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
- }
- void zero() override {
- for (int i = 0; i < __kmp_num_proc_groups; ++i)
- mask[i] = 0;
- }
- void copy(const KMPAffinity::Mask *src) override {
- const Mask *convert = static_cast<const Mask *>(src);
- for (int i = 0; i < __kmp_num_proc_groups; ++i)
- mask[i] = convert->mask[i];
- }
- void bitwise_and(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- for (int i = 0; i < __kmp_num_proc_groups; ++i)
- mask[i] &= convert->mask[i];
- }
- void bitwise_or(const KMPAffinity::Mask *rhs) override {
- const Mask *convert = static_cast<const Mask *>(rhs);
- for (int i = 0; i < __kmp_num_proc_groups; ++i)
- mask[i] |= convert->mask[i];
- }
- void bitwise_not() override {
- for (int i = 0; i < __kmp_num_proc_groups; ++i)
- mask[i] = ~(mask[i]);
- }
- int begin() const override {
- int retval = 0;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
- int next(int previous) const override {
- int retval = previous + 1;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int set_process_affinity(bool abort_on_error) const override {
- if (__kmp_num_proc_groups <= 1) {
- if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
- __kmp_msg_null);
- }
- return error;
- }
- }
- return 0;
- }
- int set_system_affinity(bool abort_on_error) const override {
- if (__kmp_num_proc_groups > 1) {
- // Check for a valid mask.
- GROUP_AFFINITY ga;
- int group = get_proc_group();
- if (group < 0) {
- if (abort_on_error) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
- return -1;
- }
- // Transform the bit vector into a GROUP_AFFINITY struct
- // and make the system call to set affinity.
- ga.Group = group;
- ga.Mask = mask[group];
- ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
- KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
- if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
- __kmp_msg_null);
- }
- return error;
- }
- } else {
- if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
- __kmp_msg_null);
- }
- return error;
- }
- }
- return 0;
- }
- int get_system_affinity(bool abort_on_error) override {
- if (__kmp_num_proc_groups > 1) {
- this->zero();
- GROUP_AFFINITY ga;
- KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
- if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
- (ga.Mask == 0)) {
- return -1;
- }
- mask[ga.Group] = ga.Mask;
- } else {
- mask_t newMask, sysMask, retval;
- if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
- if (!retval) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
- if (!newMask) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- }
- *mask = retval;
- }
- return 0;
- }
- int get_proc_group() const override {
- int group = -1;
- if (__kmp_num_proc_groups == 1) {
- return 1;
- }
- for (int i = 0; i < __kmp_num_proc_groups; i++) {
- if (mask[i] == 0)
- continue;
- if (group >= 0)
- return -1;
- group = i;
- }
- return group;
- }
- };
- void determine_capable(const char *env_var) override {
- __kmp_affinity_determine_capable(env_var);
- }
- void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
- KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
- void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
- KMPAffinity::Mask *allocate_mask_array(int num) override {
- return new Mask[num];
- }
- void deallocate_mask_array(KMPAffinity::Mask *array) override {
- Mask *windows_array = static_cast<Mask *>(array);
- delete[] windows_array;
- }
- KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
- int index) override {
- Mask *windows_array = static_cast<Mask *>(array);
- return &(windows_array[index]);
- }
- api_type get_api_type() const override { return NATIVE_OS; }
- };
- #endif /* KMP_OS_WINDOWS */
- #endif /* KMP_AFFINITY_SUPPORTED */
- // Describe an attribute for a level in the machine topology
- struct kmp_hw_attr_t {
- int core_type : 8;
- int core_eff : 8;
- unsigned valid : 1;
- unsigned reserved : 15;
- static const int UNKNOWN_CORE_EFF = -1;
- kmp_hw_attr_t()
- : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
- valid(0), reserved(0) {}
- void set_core_type(kmp_hw_core_type_t type) {
- valid = 1;
- core_type = type;
- }
- void set_core_eff(int eff) {
- valid = 1;
- core_eff = eff;
- }
- kmp_hw_core_type_t get_core_type() const {
- return (kmp_hw_core_type_t)core_type;
- }
- int get_core_eff() const { return core_eff; }
- bool is_core_type_valid() const {
- return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
- }
- bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
- operator bool() const { return valid; }
- void clear() {
- core_type = KMP_HW_CORE_TYPE_UNKNOWN;
- core_eff = UNKNOWN_CORE_EFF;
- valid = 0;
- }
- bool contains(const kmp_hw_attr_t &other) const {
- if (!valid && !other.valid)
- return true;
- if (valid && other.valid) {
- if (other.is_core_type_valid()) {
- if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
- return false;
- }
- if (other.is_core_eff_valid()) {
- if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
- return false;
- }
- return true;
- }
- return false;
- }
- bool operator==(const kmp_hw_attr_t &rhs) const {
- return (rhs.valid == valid && rhs.core_eff == core_eff &&
- rhs.core_type == core_type);
- }
- bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
- };
- class kmp_hw_thread_t {
- public:
- static const int UNKNOWN_ID = -1;
- static int compare_ids(const void *a, const void *b);
- static int compare_compact(const void *a, const void *b);
- int ids[KMP_HW_LAST];
- int sub_ids[KMP_HW_LAST];
- bool leader;
- int os_id;
- kmp_hw_attr_t attrs;
- void print() const;
- void clear() {
- for (int i = 0; i < (int)KMP_HW_LAST; ++i)
- ids[i] = UNKNOWN_ID;
- leader = false;
- attrs.clear();
- }
- };
- class kmp_topology_t {
- struct flags_t {
- int uniform : 1;
- int reserved : 31;
- };
- int depth;
- // The following arrays are all 'depth' long and have been
- // allocated to hold up to KMP_HW_LAST number of objects if
- // needed so layers can be added without reallocation of any array
- // Orderd array of the types in the topology
- kmp_hw_t *types;
- // Keep quick topology ratios, for non-uniform topologies,
- // this ratio holds the max number of itemAs per itemB
- // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
- int *ratio;
- // Storage containing the absolute number of each topology layer
- int *count;
- // The number of core efficiencies. This is only useful for hybrid
- // topologies. Core efficiencies will range from 0 to num efficiencies - 1
- int num_core_efficiencies;
- int num_core_types;
- kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
- // The hardware threads array
- // hw_threads is num_hw_threads long
- // Each hw_thread's ids and sub_ids are depth deep
- int num_hw_threads;
- kmp_hw_thread_t *hw_threads;
- // Equivalence hash where the key is the hardware topology item
- // and the value is the equivalent hardware topology type in the
- // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
- // known equivalence for the topology type
- kmp_hw_t equivalent[KMP_HW_LAST];
- // Flags describing the topology
- flags_t flags;
- // Insert a new topology layer after allocation
- void _insert_layer(kmp_hw_t type, const int *ids);
- #if KMP_GROUP_AFFINITY
- // Insert topology information about Windows Processor groups
- void _insert_windows_proc_groups();
- #endif
- // Count each item & get the num x's per y
- // e.g., get the number of cores and the number of threads per core
- // for each (x, y) in (KMP_HW_* , KMP_HW_*)
- void _gather_enumeration_information();
- // Remove layers that don't add information to the topology.
- // This is done by having the layer take on the id = UNKNOWN_ID (-1)
- void _remove_radix1_layers();
- // Find out if the topology is uniform
- void _discover_uniformity();
- // Set all the sub_ids for each hardware thread
- void _set_sub_ids();
- // Set global affinity variables describing the number of threads per
- // core, the number of packages, the number of cores per package, and
- // the number of cores.
- void _set_globals();
- // Set the last level cache equivalent type
- void _set_last_level_cache();
- // Return the number of cores with a particular attribute, 'attr'.
- // If 'find_all' is true, then find all cores on the machine, otherwise find
- // all cores per the layer 'above'
- int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
- bool find_all = false) const;
- public:
- // Force use of allocate()/deallocate()
- kmp_topology_t() = delete;
- kmp_topology_t(const kmp_topology_t &t) = delete;
- kmp_topology_t(kmp_topology_t &&t) = delete;
- kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
- kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
- static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
- static void deallocate(kmp_topology_t *);
- // Functions used in create_map() routines
- kmp_hw_thread_t &at(int index) {
- KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
- return hw_threads[index];
- }
- const kmp_hw_thread_t &at(int index) const {
- KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
- return hw_threads[index];
- }
- int get_num_hw_threads() const { return num_hw_threads; }
- void sort_ids() {
- qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
- kmp_hw_thread_t::compare_ids);
- }
- // Check if the hardware ids are unique, if they are
- // return true, otherwise return false
- bool check_ids() const;
- // Function to call after the create_map() routine
- void canonicalize();
- void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
- // Functions used after canonicalize() called
- bool filter_hw_subset();
- bool is_close(int hwt1, int hwt2, int level) const;
- bool is_uniform() const { return flags.uniform; }
- // Tell whether a type is a valid type in the topology
- // returns KMP_HW_UNKNOWN when there is no equivalent type
- kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
- // Set type1 = type2
- void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
- KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
- KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
- kmp_hw_t real_type2 = equivalent[type2];
- if (real_type2 == KMP_HW_UNKNOWN)
- real_type2 = type2;
- equivalent[type1] = real_type2;
- // This loop is required since any of the types may have been set to
- // be equivalent to type1. They all must be checked and reset to type2.
- KMP_FOREACH_HW_TYPE(type) {
- if (equivalent[type] == type1) {
- equivalent[type] = real_type2;
- }
- }
- }
- // Calculate number of types corresponding to level1
- // per types corresponding to level2 (e.g., number of threads per core)
- int calculate_ratio(int level1, int level2) const {
- KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
- KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
- int r = 1;
- for (int level = level1; level > level2; --level)
- r *= ratio[level];
- return r;
- }
- int get_ratio(int level) const {
- KMP_DEBUG_ASSERT(level >= 0 && level < depth);
- return ratio[level];
- }
- int get_depth() const { return depth; };
- kmp_hw_t get_type(int level) const {
- KMP_DEBUG_ASSERT(level >= 0 && level < depth);
- return types[level];
- }
- int get_level(kmp_hw_t type) const {
- KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
- int eq_type = equivalent[type];
- if (eq_type == KMP_HW_UNKNOWN)
- return -1;
- for (int i = 0; i < depth; ++i)
- if (types[i] == eq_type)
- return i;
- return -1;
- }
- int get_count(int level) const {
- KMP_DEBUG_ASSERT(level >= 0 && level < depth);
- return count[level];
- }
- // Return the total number of cores with attribute 'attr'
- int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
- return _get_ncores_with_attr(attr, -1, true);
- }
- // Return the number of cores with attribute
- // 'attr' per topology level 'above'
- int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
- return _get_ncores_with_attr(attr, above, false);
- }
- #if KMP_AFFINITY_SUPPORTED
- void sort_compact() {
- qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
- kmp_hw_thread_t::compare_compact);
- }
- #endif
- void print(const char *env_var = "KMP_AFFINITY") const;
- void dump() const;
- };
- extern kmp_topology_t *__kmp_topology;
- class kmp_hw_subset_t {
- const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
- public:
- // Describe a machine topology item in KMP_HW_SUBSET
- struct item_t {
- kmp_hw_t type;
- int num_attrs;
- int num[MAX_ATTRS];
- int offset[MAX_ATTRS];
- kmp_hw_attr_t attr[MAX_ATTRS];
- };
- // Put parenthesis around max to avoid accidental use of Windows max macro.
- const static int USE_ALL = (std::numeric_limits<int>::max)();
- private:
- int depth;
- int capacity;
- item_t *items;
- kmp_uint64 set;
- bool absolute;
- // The set must be able to handle up to KMP_HW_LAST number of layers
- KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
- // Sorting the KMP_HW_SUBSET items to follow topology order
- // All unknown topology types will be at the beginning of the subset
- static int hw_subset_compare(const void *i1, const void *i2) {
- kmp_hw_t type1 = ((const item_t *)i1)->type;
- kmp_hw_t type2 = ((const item_t *)i2)->type;
- int level1 = __kmp_topology->get_level(type1);
- int level2 = __kmp_topology->get_level(type2);
- return level1 - level2;
- }
- public:
- // Force use of allocate()/deallocate()
- kmp_hw_subset_t() = delete;
- kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
- kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
- kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
- kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
- static kmp_hw_subset_t *allocate() {
- int initial_capacity = 5;
- kmp_hw_subset_t *retval =
- (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
- retval->depth = 0;
- retval->capacity = initial_capacity;
- retval->set = 0ull;
- retval->absolute = false;
- retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
- return retval;
- }
- static void deallocate(kmp_hw_subset_t *subset) {
- __kmp_free(subset->items);
- __kmp_free(subset);
- }
- void set_absolute() { absolute = true; }
- bool is_absolute() const { return absolute; }
- void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
- for (int i = 0; i < depth; ++i) {
- // Found an existing item for this layer type
- // Add the num, offset, and attr to this item
- if (items[i].type == type) {
- int idx = items[i].num_attrs++;
- if ((size_t)idx >= MAX_ATTRS)
- return;
- items[i].num[idx] = num;
- items[i].offset[idx] = offset;
- items[i].attr[idx] = attr;
- return;
- }
- }
- if (depth == capacity - 1) {
- capacity *= 2;
- item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
- for (int i = 0; i < depth; ++i)
- new_items[i] = items[i];
- __kmp_free(items);
- items = new_items;
- }
- items[depth].num_attrs = 1;
- items[depth].type = type;
- items[depth].num[0] = num;
- items[depth].offset[0] = offset;
- items[depth].attr[0] = attr;
- depth++;
- set |= (1ull << type);
- }
- int get_depth() const { return depth; }
- const item_t &at(int index) const {
- KMP_DEBUG_ASSERT(index >= 0 && index < depth);
- return items[index];
- }
- item_t &at(int index) {
- KMP_DEBUG_ASSERT(index >= 0 && index < depth);
- return items[index];
- }
- void remove(int index) {
- KMP_DEBUG_ASSERT(index >= 0 && index < depth);
- set &= ~(1ull << items[index].type);
- for (int j = index + 1; j < depth; ++j) {
- items[j - 1] = items[j];
- }
- depth--;
- }
- void sort() {
- KMP_DEBUG_ASSERT(__kmp_topology);
- qsort(items, depth, sizeof(item_t), hw_subset_compare);
- }
- bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
- void dump() const {
- printf("**********************\n");
- printf("*** kmp_hw_subset: ***\n");
- printf("* depth: %d\n", depth);
- printf("* items:\n");
- for (int i = 0; i < depth; ++i) {
- printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
- for (int j = 0; j < items[i].num_attrs; ++j) {
- printf(" num: %d, offset: %d, attr: ", items[i].num[j],
- items[i].offset[j]);
- if (!items[i].attr[j]) {
- printf(" (none)\n");
- } else {
- printf(
- " core_type = %s, core_eff = %d\n",
- __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
- items[i].attr[j].get_core_eff());
- }
- }
- }
- printf("* set: 0x%llx\n", set);
- printf("* absolute: %d\n", absolute);
- printf("**********************\n");
- }
- };
- extern kmp_hw_subset_t *__kmp_hw_subset;
- /* A structure for holding machine-specific hierarchy info to be computed once
- at init. This structure represents a mapping of threads to the actual machine
- hierarchy, or to our best guess at what the hierarchy might be, for the
- purpose of performing an efficient barrier. In the worst case, when there is
- no machine hierarchy information, it produces a tree suitable for a barrier,
- similar to the tree used in the hyper barrier. */
- class hierarchy_info {
- public:
- /* Good default values for number of leaves and branching factor, given no
- affinity information. Behaves a bit like hyper barrier. */
- static const kmp_uint32 maxLeaves = 4;
- static const kmp_uint32 minBranch = 4;
- /** Number of levels in the hierarchy. Typical levels are threads/core,
- cores/package or socket, packages/node, nodes/machine, etc. We don't want
- to get specific with nomenclature. When the machine is oversubscribed we
- add levels to duplicate the hierarchy, doubling the thread capacity of the
- hierarchy each time we add a level. */
- kmp_uint32 maxLevels;
- /** This is specifically the depth of the machine configuration hierarchy, in
- terms of the number of levels along the longest path from root to any
- leaf. It corresponds to the number of entries in numPerLevel if we exclude
- all but one trailing 1. */
- kmp_uint32 depth;
- kmp_uint32 base_num_threads;
- enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
- volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
- // 2=initialization in progress
- volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
- /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
- the parent of a node at level i has. For example, if we have a machine
- with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
- {2, 4, 4, 1, 1}. All empty levels are set to 1. */
- kmp_uint32 *numPerLevel;
- kmp_uint32 *skipPerLevel;
- void deriveLevels() {
- int hier_depth = __kmp_topology->get_depth();
- for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
- numPerLevel[level] = __kmp_topology->get_ratio(i);
- }
- }
- hierarchy_info()
- : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
- void fini() {
- if (!uninitialized && numPerLevel) {
- __kmp_free(numPerLevel);
- numPerLevel = NULL;
- uninitialized = not_initialized;
- }
- }
- void init(int num_addrs) {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
- &uninitialized, not_initialized, initializing);
- if (bool_result == 0) { // Wait for initialization
- while (TCR_1(uninitialized) != initialized)
- KMP_CPU_PAUSE();
- return;
- }
- KMP_DEBUG_ASSERT(bool_result == 1);
- /* Added explicit initialization of the data fields here to prevent usage of
- dirty value observed when static library is re-initialized multiple times
- (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
- OpenMP). */
- depth = 1;
- resizing = 0;
- maxLevels = 7;
- numPerLevel =
- (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
- for (kmp_uint32 i = 0; i < maxLevels;
- ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
- // Sort table by physical ID
- if (__kmp_topology && __kmp_topology->get_depth() > 0) {
- deriveLevels();
- } else {
- numPerLevel[0] = maxLeaves;
- numPerLevel[1] = num_addrs / maxLeaves;
- if (num_addrs % maxLeaves)
- numPerLevel[1]++;
- }
- base_num_threads = num_addrs;
- for (int i = maxLevels - 1; i >= 0;
- --i) // count non-empty levels to get depth
- if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
- depth++;
- kmp_uint32 branch = minBranch;
- if (numPerLevel[0] == 1)
- branch = num_addrs / maxLeaves;
- if (branch < minBranch)
- branch = minBranch;
- for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
- while (numPerLevel[d] > branch ||
- (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
- if (numPerLevel[d] & 1)
- numPerLevel[d]++;
- numPerLevel[d] = numPerLevel[d] >> 1;
- if (numPerLevel[d + 1] == 1)
- depth++;
- numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
- }
- if (numPerLevel[0] == 1) {
- branch = branch >> 1;
- if (branch < 4)
- branch = minBranch;
- }
- }
- for (kmp_uint32 i = 1; i < depth; ++i)
- skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
- // Fill in hierarchy in the case of oversubscription
- for (kmp_uint32 i = depth; i < maxLevels; ++i)
- skipPerLevel[i] = 2 * skipPerLevel[i - 1];
- uninitialized = initialized; // One writer
- }
- // Resize the hierarchy if nproc changes to something larger than before
- void resize(kmp_uint32 nproc) {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
- while (bool_result == 0) { // someone else is trying to resize
- KMP_CPU_PAUSE();
- if (nproc <= base_num_threads) // happy with other thread's resize
- return;
- else // try to resize
- bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
- }
- KMP_DEBUG_ASSERT(bool_result != 0);
- if (nproc <= base_num_threads)
- return; // happy with other thread's resize
- // Calculate new maxLevels
- kmp_uint32 old_sz = skipPerLevel[depth - 1];
- kmp_uint32 incs = 0, old_maxLevels = maxLevels;
- // First see if old maxLevels is enough to contain new size
- for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
- skipPerLevel[i] = 2 * skipPerLevel[i - 1];
- numPerLevel[i - 1] *= 2;
- old_sz *= 2;
- depth++;
- }
- if (nproc > old_sz) { // Not enough space, need to expand hierarchy
- while (nproc > old_sz) {
- old_sz *= 2;
- incs++;
- depth++;
- }
- maxLevels += incs;
- // Resize arrays
- kmp_uint32 *old_numPerLevel = numPerLevel;
- kmp_uint32 *old_skipPerLevel = skipPerLevel;
- numPerLevel = skipPerLevel = NULL;
- numPerLevel =
- (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
- // Copy old elements from old arrays
- for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
- // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = old_numPerLevel[i];
- skipPerLevel[i] = old_skipPerLevel[i];
- }
- // Init new elements in arrays to 1
- for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
- // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
- // Free old arrays
- __kmp_free(old_numPerLevel);
- }
- // Fill in oversubscription levels of hierarchy
- for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
- skipPerLevel[i] = 2 * skipPerLevel[i - 1];
- base_num_threads = nproc;
- resizing = 0; // One writer
- }
- };
- #endif // KMP_AFFINITY_H
|