kmp_itt.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. #if USE_ITT_BUILD
  2. /*
  3. * kmp_itt.h -- ITT Notify interface.
  4. */
  5. //===----------------------------------------------------------------------===//
  6. //
  7. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  8. // See https://llvm.org/LICENSE.txt for license information.
  9. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #ifndef KMP_ITT_H
  13. #define KMP_ITT_H
  14. #include "kmp_lock.h"
  15. #define INTEL_ITTNOTIFY_API_PRIVATE
  16. #error #include "ittnotify.h"
  17. #error #include "legacy/ittnotify.h"
  18. #if KMP_DEBUG
  19. #define __kmp_inline // Turn off inlining in debug mode.
  20. #else
  21. #define __kmp_inline static inline
  22. #endif
  23. #if USE_ITT_NOTIFY
  24. extern kmp_int32 __kmp_itt_prepare_delay;
  25. #ifdef __cplusplus
  26. extern "C" void __kmp_itt_fini_ittlib(void);
  27. #else
  28. extern void __kmp_itt_fini_ittlib(void);
  29. #endif
  30. #endif
  31. // Simplify the handling of an argument that is only required when USE_ITT_BUILD
  32. // is enabled.
  33. #define USE_ITT_BUILD_ARG(x) , x
  34. void __kmp_itt_initialize();
  35. void __kmp_itt_destroy();
  36. void __kmp_itt_reset();
  37. // -----------------------------------------------------------------------------
  38. // New stuff for reporting high-level constructs.
  39. // Note the naming convention:
  40. // __kmp_itt_xxxing() function should be called before action, while
  41. // __kmp_itt_xxxed() function should be called after action.
  42. // --- Parallel region reporting ---
  43. __kmp_inline void
  44. __kmp_itt_region_forking(int gtid, int team_size,
  45. int barriers); // Primary only, before forking threads.
  46. __kmp_inline void
  47. __kmp_itt_region_joined(int gtid); // Primary only, after joining threads.
  48. // (*) Note: A thread may execute tasks after this point, though.
  49. // --- Frame reporting ---
  50. // region=0: no regions, region=1: parallel, region=2: serialized parallel
  51. __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
  52. __itt_timestamp end, int imbalance,
  53. ident_t *loc, int team_size,
  54. int region = 0);
  55. // --- Metadata reporting ---
  56. // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated
  57. // wait time value, reduction -if this is a reduction barrier
  58. __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
  59. kmp_uint64 end,
  60. kmp_uint64 imbalance,
  61. kmp_uint64 reduction);
  62. // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others);
  63. // iterations - loop trip count, chunk - chunk size
  64. __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
  65. kmp_uint64 iterations,
  66. kmp_uint64 chunk);
  67. __kmp_inline void __kmp_itt_metadata_single(ident_t *loc);
  68. // --- Barrier reporting ---
  69. __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0,
  70. int delta = 0);
  71. __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object);
  72. __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object);
  73. __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object);
  74. // --- Taskwait reporting ---
  75. __kmp_inline void *__kmp_itt_taskwait_object(int gtid);
  76. __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object);
  77. __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object);
  78. #define KMP_ITT_TASKWAIT_STARTING(obj) \
  79. if (UNLIKELY(__itt_sync_create_ptr)) { \
  80. obj = __kmp_itt_taskwait_object(gtid); \
  81. if (obj != NULL) { \
  82. __kmp_itt_taskwait_starting(gtid, obj); \
  83. } \
  84. }
  85. #define KMP_ITT_TASKWAIT_FINISHED(obj) \
  86. if (UNLIKELY(obj != NULL)) \
  87. __kmp_itt_taskwait_finished(gtid, obj);
  88. // --- Task reporting ---
  89. __kmp_inline void __kmp_itt_task_starting(void *object);
  90. __kmp_inline void __kmp_itt_task_finished(void *object);
  91. // --- Lock reporting ---
  92. #if KMP_USE_DYNAMIC_LOCK
  93. __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock,
  94. const ident_t *);
  95. #else
  96. __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock);
  97. #endif
  98. __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock);
  99. __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock);
  100. __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock);
  101. __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock);
  102. __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock);
  103. // --- Critical reporting ---
  104. #if KMP_USE_DYNAMIC_LOCK
  105. __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock,
  106. const ident_t *);
  107. #else
  108. __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock);
  109. #endif
  110. __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock);
  111. __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock);
  112. __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock);
  113. __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock);
  114. // --- Single reporting ---
  115. __kmp_inline void __kmp_itt_single_start(int gtid);
  116. __kmp_inline void __kmp_itt_single_end(int gtid);
  117. // --- Ordered reporting ---
  118. __kmp_inline void __kmp_itt_ordered_init(int gtid);
  119. __kmp_inline void __kmp_itt_ordered_prep(int gtid);
  120. __kmp_inline void __kmp_itt_ordered_start(int gtid);
  121. __kmp_inline void __kmp_itt_ordered_end(int gtid);
  122. // --- Threads reporting ---
  123. __kmp_inline void __kmp_itt_thread_ignore();
  124. __kmp_inline void __kmp_itt_thread_name(int gtid);
  125. // --- System objects ---
  126. __kmp_inline void __kmp_itt_system_object_created(void *object,
  127. char const *name);
  128. // --- Stack stitching ---
  129. __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void);
  130. __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller);
  131. __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller);
  132. __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller);
  133. // -----------------------------------------------------------------------------
  134. // Old stuff for reporting low-level internal synchronization.
  135. #if USE_ITT_NOTIFY
  136. /* Support for SSC marks, which are used by SDE
  137. http://software.intel.com/en-us/articles/intel-software-development-emulator
  138. to mark points in instruction traces that represent spin-loops and are
  139. therefore uninteresting when collecting traces for architecture simulation.
  140. */
  141. #ifndef INCLUDE_SSC_MARKS
  142. #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64)
  143. #endif
  144. /* Linux 64 only for now */
  145. #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64)
  146. // Portable (at least for gcc and icc) code to insert the necessary instructions
  147. // to set %ebx and execute the unlikely no-op.
  148. #if defined(__INTEL_COMPILER)
  149. #define INSERT_SSC_MARK(tag) __SSC_MARK(tag)
  150. #else
  151. #define INSERT_SSC_MARK(tag) \
  152. __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \
  153. : "%ebx")
  154. #endif
  155. #else
  156. #define INSERT_SSC_MARK(tag) ((void)0)
  157. #endif
  158. /* Markers for the start and end of regions that represent polling and are
  159. therefore uninteresting to architectural simulations 0x4376 and 0x4377 are
  160. arbitrary numbers that should be unique in the space of SSC tags, but there
  161. is no central issuing authority rather randomness is expected to work. */
  162. #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376)
  163. #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377)
  164. // Markers for architecture simulation.
  165. // FORKING : Before the primary thread forks.
  166. // JOINING : At the start of the join.
  167. // INVOKING : Before the threads invoke microtasks.
  168. // DISPATCH_INIT: At the start of dynamically scheduled loop.
  169. // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop.
  170. #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693)
  171. #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694)
  172. #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695)
  173. #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696)
  174. #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697)
  175. // The object is an address that associates a specific set of the prepare,
  176. // acquire, release, and cancel operations.
  177. /* Sync prepare indicates a thread is going to start waiting for another thread
  178. to send a release event. This operation should be done just before the
  179. thread begins checking for the existence of the release event */
  180. /* Sync cancel indicates a thread is cancelling a wait on another thread and
  181. continuing execution without waiting for the other thread to release it */
  182. /* Sync acquired indicates a thread has received a release event from another
  183. thread and has stopped waiting. This operation must occur only after the
  184. release event is received. */
  185. /* Sync release indicates a thread is going to send a release event to another
  186. thread so it will stop waiting and continue execution. This operation must
  187. just happen before the release event. */
  188. #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj))
  189. #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj))
  190. #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj))
  191. #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj))
  192. /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called
  193. with a delay (and not called at all if waiting time is small). So, in spin
  194. loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before
  195. spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and
  196. KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */
  197. #undef KMP_FSYNC_SPIN_INIT
  198. #define KMP_FSYNC_SPIN_INIT(obj, spin) \
  199. int sync_iters = 0; \
  200. if (__itt_fsync_prepare_ptr) { \
  201. if (obj == NULL) { \
  202. obj = spin; \
  203. } /* if */ \
  204. } /* if */ \
  205. SSC_MARK_SPIN_START()
  206. #undef KMP_FSYNC_SPIN_PREPARE
  207. #define KMP_FSYNC_SPIN_PREPARE(obj) \
  208. do { \
  209. if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \
  210. ++sync_iters; \
  211. if (sync_iters >= __kmp_itt_prepare_delay) { \
  212. KMP_FSYNC_PREPARE((void *)obj); \
  213. } /* if */ \
  214. } /* if */ \
  215. } while (0)
  216. #undef KMP_FSYNC_SPIN_ACQUIRED
  217. #define KMP_FSYNC_SPIN_ACQUIRED(obj) \
  218. do { \
  219. SSC_MARK_SPIN_END(); \
  220. if (sync_iters >= __kmp_itt_prepare_delay) { \
  221. KMP_FSYNC_ACQUIRED((void *)obj); \
  222. } /* if */ \
  223. } while (0)
  224. /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.:
  225. KMP_ITT_IGNORE(
  226. ptr = malloc( size );
  227. );
  228. */
  229. #define KMP_ITT_IGNORE(statement) \
  230. do { \
  231. __itt_state_t __itt_state_; \
  232. if (__itt_state_get_ptr) { \
  233. __itt_state_ = __itt_state_get(); \
  234. __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \
  235. } /* if */ \
  236. { statement } \
  237. if (__itt_state_get_ptr) { \
  238. __itt_state_set(__itt_state_); \
  239. } /* if */ \
  240. } while (0)
  241. // Maximum number of frame domains to use (maps to
  242. // different OpenMP regions in the user source code).
  243. const int KMP_MAX_FRAME_DOMAINS = 997;
  244. typedef struct kmp_itthash_entry {
  245. ident_t *loc;
  246. int team_size;
  247. __itt_domain *d;
  248. struct kmp_itthash_entry *next_in_bucket;
  249. } kmp_itthash_entry_t;
  250. typedef struct kmp_itthash {
  251. kmp_itthash_entry_t *buckets[KMP_MAX_FRAME_DOMAINS];
  252. int count; // just a heuristic to limit number of entries
  253. } kmp_itthash_t;
  254. extern kmp_itthash_t __kmp_itt_region_domains;
  255. extern kmp_itthash_t __kmp_itt_barrier_domains;
  256. extern __itt_domain *metadata_domain;
  257. extern __itt_string_handle *string_handle_imbl;
  258. extern __itt_string_handle *string_handle_loop;
  259. extern __itt_string_handle *string_handle_sngl;
  260. #else
  261. // Null definitions of the synchronization tracing functions.
  262. #define KMP_FSYNC_PREPARE(obj) ((void)0)
  263. #define KMP_FSYNC_CANCEL(obj) ((void)0)
  264. #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
  265. #define KMP_FSYNC_RELEASING(obj) ((void)0)
  266. #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
  267. #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
  268. #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
  269. #define KMP_ITT_IGNORE(stmt) \
  270. do { \
  271. stmt \
  272. } while (0)
  273. #endif // USE_ITT_NOTIFY
  274. #if !KMP_DEBUG
  275. // In release mode include definitions of inline functions.
  276. #error #include "kmp_itt.inl"
  277. #endif
  278. #endif // KMP_ITT_H
  279. #else /* USE_ITT_BUILD */
  280. // Null definitions of the synchronization tracing functions.
  281. // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either.
  282. // By defining these we avoid unpleasant ifdef tests in many places.
  283. #define KMP_FSYNC_PREPARE(obj) ((void)0)
  284. #define KMP_FSYNC_CANCEL(obj) ((void)0)
  285. #define KMP_FSYNC_ACQUIRED(obj) ((void)0)
  286. #define KMP_FSYNC_RELEASING(obj) ((void)0)
  287. #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0)
  288. #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0)
  289. #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0)
  290. #define KMP_ITT_IGNORE(stmt) \
  291. do { \
  292. stmt \
  293. } while (0)
  294. #define USE_ITT_BUILD_ARG(x)
  295. #endif /* USE_ITT_BUILD */