123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348 |
- #if USE_ITT_BUILD
- /*
- * kmp_itt.h -- ITT Notify interface.
- */
- //===----------------------------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef KMP_ITT_H
- #define KMP_ITT_H
- #include "kmp_lock.h"
- #define INTEL_ITTNOTIFY_API_PRIVATE
- #error #include "ittnotify.h"
- #error #include "legacy/ittnotify.h"
- #if KMP_DEBUG
- #define __kmp_inline // Turn off inlining in debug mode.
- #else
- #define __kmp_inline static inline
- #endif
- #if USE_ITT_NOTIFY
- extern kmp_int32 __kmp_itt_prepare_delay;
- #ifdef __cplusplus
- extern "C" void __kmp_itt_fini_ittlib(void);
- #else
- extern void __kmp_itt_fini_ittlib(void);
- #endif
- #endif
- // Simplify the handling of an argument that is only required when USE_ITT_BUILD
- // is enabled.
- #define USE_ITT_BUILD_ARG(x) , x
- void __kmp_itt_initialize();
- void __kmp_itt_destroy();
- void __kmp_itt_reset();
- // -----------------------------------------------------------------------------
- // New stuff for reporting high-level constructs.
- // Note the naming convention:
- // __kmp_itt_xxxing() function should be called before action, while
- // __kmp_itt_xxxed() function should be called after action.
- // --- Parallel region reporting ---
- __kmp_inline void
- __kmp_itt_region_forking(int gtid, int team_size,
- int barriers); // Primary only, before forking threads.
- __kmp_inline void
- __kmp_itt_region_joined(int gtid); // Primary only, after joining threads.
- // (*) Note: A thread may execute tasks after this point, though.
- // --- Frame reporting ---
- // region=0: no regions, region=1: parallel, region=2: serialized parallel
- __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
- __itt_timestamp end, int imbalance,
- ident_t *loc, int team_size,
- int region = 0);
- // --- Metadata reporting ---
- // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
- // wait time value, reduction -if this is a reduction barrier
- __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
- kmp_uint64 end,
- kmp_uint64 imbalance,
- kmp_uint64 reduction);
- // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
- // iterations - loop trip count, chunk - chunk size
- __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
- kmp_uint64 iterations,
- kmp_uint64 chunk);
- __kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
- // --- Barrier reporting ---
- __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
- int delta = 0);
- __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
- __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
- __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
- // --- Taskwait reporting ---
- __kmp_inline void *__kmp_itt_taskwait_object(int gtid);
- __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
- __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
- #define KMP_ITT_TASKWAIT_STARTING(obj) \
- if (UNLIKELY(__itt_sync_create_ptr)) { \
- obj = __kmp_itt_taskwait_object(gtid); \
- if (obj != NULL) { \
- __kmp_itt_taskwait_starting(gtid, obj); \
- } \
- }
- #define KMP_ITT_TASKWAIT_FINISHED(obj) \
- if (UNLIKELY(obj != NULL)) \
- __kmp_itt_taskwait_finished(gtid, obj);
- // --- Task reporting ---
- __kmp_inline void __kmp_itt_task_starting(void *object);
- __kmp_inline void __kmp_itt_task_finished(void *object);
- // --- Lock reporting ---
- #if KMP_USE_DYNAMIC_LOCK
- __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
- const ident_t *);
- #else
- __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
- #endif
- __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
- // --- Critical reporting ---
- #if KMP_USE_DYNAMIC_LOCK
- __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
- const ident_t *);
- #else
- __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
- #endif
- __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
- __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
- // --- Single reporting ---
- __kmp_inline void __kmp_itt_single_start(int gtid);
- __kmp_inline void __kmp_itt_single_end(int gtid);
- // --- Ordered reporting ---
- __kmp_inline void __kmp_itt_ordered_init(int gtid);
- __kmp_inline void __kmp_itt_ordered_prep(int gtid);
- __kmp_inline void __kmp_itt_ordered_start(int gtid);
- __kmp_inline void __kmp_itt_ordered_end(int gtid);
- // --- Threads reporting ---
- __kmp_inline void __kmp_itt_thread_ignore();
- __kmp_inline void __kmp_itt_thread_name(int gtid);
- // --- System objects ---
- __kmp_inline void __kmp_itt_system_object_created(void *object,
- char const *name);
- // --- Stack stitching ---
- __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
- __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
- __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
- __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
- // -----------------------------------------------------------------------------
- // Old stuff for reporting low-level internal synchronization.
- #if USE_ITT_NOTIFY
- /* Support for SSC marks, which are used by SDE
- http://software.intel.com/en-us/articles/intel-software-development-emulator
- to mark points in instruction traces that represent spin-loops and are
- therefore uninteresting when collecting traces for architecture simulation.
- */
- #ifndef INCLUDE_SSC_MARKS
- #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
- #endif
- /* Linux 64 only for now */
- #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
- // Portable (at least for gcc and icc) code to insert the necessary instructions
- // to set %ebx and execute the unlikely no-op.
- #if defined(__INTEL_COMPILER)
- #define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
- #else
- #define INSERT_SSC_MARK(tag) \
- __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \
- : "%ebx")
- #endif
- #else
- #define INSERT_SSC_MARK(tag) ((void)0)
- #endif
- /* Markers for the start and end of regions that represent polling and are
- therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
- arbitrary numbers that should be unique in the space of SSC tags, but there
- is no central issuing authority rather randomness is expected to work. */
- #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
- #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
- // Markers for architecture simulation.
- // FORKING : Before the primary thread forks.
- // JOINING : At the start of the join.
- // INVOKING : Before the threads invoke microtasks.
- // DISPATCH_INIT: At the start of dynamically scheduled loop.
- // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
- #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
- #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
- #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
- #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
- #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
- // The object is an address that associates a specific set of the prepare,
- // acquire, release, and cancel operations.
- /* Sync prepare indicates a thread is going to start waiting for another thread
- to send a release event. This operation should be done just before the
- thread begins checking for the existence of the release event */
- /* Sync cancel indicates a thread is cancelling a wait on another thread and
- continuing execution without waiting for the other thread to release it */
- /* Sync acquired indicates a thread has received a release event from another
- thread and has stopped waiting. This operation must occur only after the
- release event is received. */
- /* Sync release indicates a thread is going to send a release event to another
- thread so it will stop waiting and continue execution. This operation must
- just happen before the release event. */
- #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
- #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
- #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
- #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
- /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
- with a delay (and not called at all if waiting time is small). So, in spin
- loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
- spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
- KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
- #undef KMP_FSYNC_SPIN_INIT
- #define KMP_FSYNC_SPIN_INIT(obj, spin) \
- int sync_iters = 0; \
- if (__itt_fsync_prepare_ptr) { \
- if (obj == NULL) { \
- obj = spin; \
- } /* if */ \
- } /* if */ \
- SSC_MARK_SPIN_START()
- #undef KMP_FSYNC_SPIN_PREPARE
- #define KMP_FSYNC_SPIN_PREPARE(obj) \
- do { \
- if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \
- ++sync_iters; \
- if (sync_iters >= __kmp_itt_prepare_delay) { \
- KMP_FSYNC_PREPARE((void *)obj); \
- } /* if */ \
- } /* if */ \
- } while (0)
- #undef KMP_FSYNC_SPIN_ACQUIRED
- #define KMP_FSYNC_SPIN_ACQUIRED(obj) \
- do { \
- SSC_MARK_SPIN_END(); \
- if (sync_iters >= __kmp_itt_prepare_delay) { \
- KMP_FSYNC_ACQUIRED((void *)obj); \
- } /* if */ \
- } while (0)
- /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
- KMP_ITT_IGNORE(
- ptr = malloc( size );
- );
- */
- #define KMP_ITT_IGNORE(statement) \
- do { \
- __itt_state_t __itt_state_; \
- if (__itt_state_get_ptr) { \
- __itt_state_ = __itt_state_get(); \
- __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \
- } /* if */ \
- { statement } \
- if (__itt_state_get_ptr) { \
- __itt_state_set(__itt_state_); \
- } /* if */ \
- } while (0)
- // Maximum number of frame domains to use (maps to
- // different OpenMP regions in the user source code).
- const int KMP_MAX_FRAME_DOMAINS = 997;
- typedef struct kmp_itthash_entry {
- ident_t *loc;
- int team_size;
- __itt_domain *d;
- struct kmp_itthash_entry *next_in_bucket;
- } kmp_itthash_entry_t;
- typedef struct kmp_itthash {
- kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS];
- int count; // just a heuristic to limit number of entries
- } kmp_itthash_t;
- extern kmp_itthash_t __kmp_itt_region_domains;
- extern kmp_itthash_t __kmp_itt_barrier_domains;
- extern __itt_domain *metadata_domain;
- extern __itt_string_handle *string_handle_imbl;
- extern __itt_string_handle *string_handle_loop;
- extern __itt_string_handle *string_handle_sngl;
- #else
- // Null definitions of the synchronization tracing functions.
- #define KMP_FSYNC_PREPARE(obj) ((void)0)
- #define KMP_FSYNC_CANCEL(obj) ((void)0)
- #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
- #define KMP_FSYNC_RELEASING(obj) ((void)0)
- #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
- #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
- #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
- #define KMP_ITT_IGNORE(stmt) \
- do { \
- stmt \
- } while (0)
- #endif // USE_ITT_NOTIFY
- #if !KMP_DEBUG
- // In release mode include definitions of inline functions.
- #error #include "kmp_itt.inl"
- #endif
- #endif // KMP_ITT_H
- #else /* USE_ITT_BUILD */
- // Null definitions of the synchronization tracing functions.
- // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
- // By defining these we avoid unpleasant ifdef tests in many places.
- #define KMP_FSYNC_PREPARE(obj) ((void)0)
- #define KMP_FSYNC_CANCEL(obj) ((void)0)
- #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
- #define KMP_FSYNC_RELEASING(obj) ((void)0)
- #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
- #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
- #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
- #define KMP_ITT_IGNORE(stmt) \
- do { \
- stmt \
- } while (0)
- #define USE_ITT_BUILD_ARG(x)
- #endif /* USE_ITT_BUILD */
|