123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141 |
- /*
- * kmp_barrier.h
- */
- //===----------------------------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef KMP_BARRIER_H
- #define KMP_BARRIER_H
- #include "kmp.h"
- #include "kmp_i18n.h"
- #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
- #include <xmmintrin.h>
- #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
- #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
- #elif KMP_HAVE_ALIGNED_ALLOC
- #define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
- #define KMP_ALIGNED_FREE(ptr) free(ptr)
- #elif KMP_HAVE_POSIX_MEMALIGN
- static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
- void *ptr;
- int n = posix_memalign(&ptr, alignment, size);
- if (n != 0) {
- if (ptr)
- free(ptr);
- return nullptr;
- }
- return ptr;
- }
- #define KMP_ALIGNED_FREE(ptr) free(ptr)
- #elif KMP_HAVE__ALIGNED_MALLOC
- #include <malloc.h>
- #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
- #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
- #else
- #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
- #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
- #endif
- // Use four cache lines: MLC tends to prefetch the next or previous cache line
- // creating a possible fake conflict between cores, so this is the only way to
- // guarantee that no such prefetch can happen.
- #ifndef KMP_FOURLINE_ALIGN_CACHE
- #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
- #endif
- #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
- class distributedBarrier {
- struct flags_s {
- kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
- };
- struct go_s {
- std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
- };
- struct iter_s {
- kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
- };
- struct sleep_s {
- std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
- };
- void init(size_t nthr);
- void resize(size_t nthr);
- void computeGo(size_t n);
- void computeVarsForN(size_t n);
- public:
- enum {
- MAX_ITERS = 3,
- MAX_GOS = 8,
- IDEAL_GOS = 4,
- IDEAL_CONTENTION = 16,
- };
- flags_s *flags[MAX_ITERS];
- go_s *go;
- iter_s *iter;
- sleep_s *sleep;
- size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
- size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
- // number of go signals each requiring one write per iteration
- size_t KMP_ALIGN_CACHE num_gos;
- // number of groups of gos
- size_t KMP_ALIGN_CACHE num_groups;
- // threads per go signal
- size_t KMP_ALIGN_CACHE threads_per_go;
- bool KMP_ALIGN_CACHE fix_threads_per_go;
- // threads per group
- size_t KMP_ALIGN_CACHE threads_per_group;
- // number of go signals in a group
- size_t KMP_ALIGN_CACHE gos_per_group;
- void *team_icvs;
- distributedBarrier() = delete;
- ~distributedBarrier() = delete;
- // Used instead of constructor to create aligned data
- static distributedBarrier *allocate(int nThreads) {
- distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
- sizeof(distributedBarrier), 4 * CACHE_LINE);
- if (!d) {
- KMP_FATAL(MemoryAllocFailed);
- }
- d->num_threads = 0;
- d->max_threads = 0;
- for (int i = 0; i < MAX_ITERS; ++i)
- d->flags[i] = NULL;
- d->go = NULL;
- d->iter = NULL;
- d->sleep = NULL;
- d->team_icvs = NULL;
- d->fix_threads_per_go = false;
- // calculate gos and groups ONCE on base size
- d->computeGo(nThreads);
- d->init(nThreads);
- return d;
- }
- static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
- void update_num_threads(size_t nthr) { init(nthr); }
- bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
- size_t get_num_threads() { return num_threads; }
- kmp_uint64 go_release();
- void go_reset();
- };
- #endif // KMP_BARRIER_H
|