kmp_threadprivate.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798
  1. /*
  2. * kmp_threadprivate.cpp -- OpenMP threadprivate support library
  3. */
  4. //===----------------------------------------------------------------------===//
  5. //
  6. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  7. // See https://llvm.org/LICENSE.txt for license information.
  8. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "kmp.h"
  12. #include "kmp_i18n.h"
  13. #include "kmp_itt.h"
  14. #define USE_CHECKS_COMMON
  15. #define KMP_INLINE_SUBR 1
  16. void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
  17. void *data_addr, size_t pc_size);
  18. struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
  19. void *data_addr,
  20. size_t pc_size);
  21. struct shared_table __kmp_threadprivate_d_table;
  22. static
  23. #ifdef KMP_INLINE_SUBR
  24. __forceinline
  25. #endif
  26. struct private_common *
  27. __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
  28. void *pc_addr)
  29. {
  30. struct private_common *tn;
  31. #ifdef KMP_TASK_COMMON_DEBUG
  32. KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
  33. "address %p\n",
  34. gtid, pc_addr));
  35. dump_list();
  36. #endif
  37. for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
  38. if (tn->gbl_addr == pc_addr) {
  39. #ifdef KMP_TASK_COMMON_DEBUG
  40. KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
  41. "node %p on list\n",
  42. gtid, pc_addr));
  43. #endif
  44. return tn;
  45. }
  46. }
  47. return 0;
  48. }
  49. static
  50. #ifdef KMP_INLINE_SUBR
  51. __forceinline
  52. #endif
  53. struct shared_common *
  54. __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
  55. void *pc_addr) {
  56. struct shared_common *tn;
  57. for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
  58. if (tn->gbl_addr == pc_addr) {
  59. #ifdef KMP_TASK_COMMON_DEBUG
  60. KC_TRACE(
  61. 10,
  62. ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
  63. gtid, pc_addr));
  64. #endif
  65. return tn;
  66. }
  67. }
  68. return 0;
  69. }
  70. // Create a template for the data initialized storage. Either the template is
  71. // NULL indicating zero fill, or the template is a copy of the original data.
  72. static struct private_data *__kmp_init_common_data(void *pc_addr,
  73. size_t pc_size) {
  74. struct private_data *d;
  75. size_t i;
  76. char *p;
  77. d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
  78. /*
  79. d->data = 0; // AC: commented out because __kmp_allocate zeroes the
  80. memory
  81. d->next = 0;
  82. */
  83. d->size = pc_size;
  84. d->more = 1;
  85. p = (char *)pc_addr;
  86. for (i = pc_size; i > 0; --i) {
  87. if (*p++ != '\0') {
  88. d->data = __kmp_allocate(pc_size);
  89. KMP_MEMCPY(d->data, pc_addr, pc_size);
  90. break;
  91. }
  92. }
  93. return d;
  94. }
  95. // Initialize the data area from the template.
  96. static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
  97. char *addr = (char *)pc_addr;
  98. for (size_t offset = 0; d != 0; d = d->next) {
  99. for (int i = d->more; i > 0; --i) {
  100. if (d->data == 0)
  101. memset(&addr[offset], '\0', d->size);
  102. else
  103. KMP_MEMCPY(&addr[offset], d->data, d->size);
  104. offset += d->size;
  105. }
  106. }
  107. }
  108. /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
  109. void __kmp_common_initialize(void) {
  110. if (!TCR_4(__kmp_init_common)) {
  111. int q;
  112. #ifdef KMP_DEBUG
  113. int gtid;
  114. #endif
  115. __kmp_threadpriv_cache_list = NULL;
  116. #ifdef KMP_DEBUG
  117. /* verify the uber masters were initialized */
  118. for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
  119. if (__kmp_root[gtid]) {
  120. KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
  121. for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
  122. KMP_DEBUG_ASSERT(
  123. !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
  124. /* __kmp_root[ gitd ]-> r.r_uber_thread ->
  125. * th.th_pri_common -> data[ q ] = 0;*/
  126. }
  127. #endif /* KMP_DEBUG */
  128. for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
  129. __kmp_threadprivate_d_table.data[q] = 0;
  130. TCW_4(__kmp_init_common, TRUE);
  131. }
  132. }
  133. /* Call all destructors for threadprivate data belonging to all threads.
  134. Currently unused! */
  135. void __kmp_common_destroy(void) {
  136. if (TCR_4(__kmp_init_common)) {
  137. int q;
  138. TCW_4(__kmp_init_common, FALSE);
  139. for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
  140. int gtid;
  141. struct private_common *tn;
  142. struct shared_common *d_tn;
  143. /* C++ destructors need to be called once per thread before exiting.
  144. Don't call destructors for primary thread though unless we used copy
  145. constructor */
  146. for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
  147. d_tn = d_tn->next) {
  148. if (d_tn->is_vec) {
  149. if (d_tn->dt.dtorv != 0) {
  150. for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
  151. if (__kmp_threads[gtid]) {
  152. if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
  153. : (!KMP_UBER_GTID(gtid))) {
  154. tn = __kmp_threadprivate_find_task_common(
  155. __kmp_threads[gtid]->th.th_pri_common, gtid,
  156. d_tn->gbl_addr);
  157. if (tn) {
  158. (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
  159. }
  160. }
  161. }
  162. }
  163. if (d_tn->obj_init != 0) {
  164. (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
  165. }
  166. }
  167. } else {
  168. if (d_tn->dt.dtor != 0) {
  169. for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
  170. if (__kmp_threads[gtid]) {
  171. if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
  172. : (!KMP_UBER_GTID(gtid))) {
  173. tn = __kmp_threadprivate_find_task_common(
  174. __kmp_threads[gtid]->th.th_pri_common, gtid,
  175. d_tn->gbl_addr);
  176. if (tn) {
  177. (*d_tn->dt.dtor)(tn->par_addr);
  178. }
  179. }
  180. }
  181. }
  182. if (d_tn->obj_init != 0) {
  183. (*d_tn->dt.dtor)(d_tn->obj_init);
  184. }
  185. }
  186. }
  187. }
  188. __kmp_threadprivate_d_table.data[q] = 0;
  189. }
  190. }
  191. }
  192. /* Call all destructors for threadprivate data belonging to this thread */
  193. void __kmp_common_destroy_gtid(int gtid) {
  194. struct private_common *tn;
  195. struct shared_common *d_tn;
  196. if (!TCR_4(__kmp_init_gtid)) {
  197. // This is possible when one of multiple roots initiates early library
  198. // termination in a sequential region while other teams are active, and its
  199. // child threads are about to end.
  200. return;
  201. }
  202. KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
  203. if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
  204. if (TCR_4(__kmp_init_common)) {
  205. /* Cannot do this here since not all threads have destroyed their data */
  206. /* TCW_4(__kmp_init_common, FALSE); */
  207. for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
  208. d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
  209. tn->gbl_addr);
  210. if (d_tn == NULL)
  211. continue;
  212. if (d_tn->is_vec) {
  213. if (d_tn->dt.dtorv != 0) {
  214. (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
  215. }
  216. if (d_tn->obj_init != 0) {
  217. (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
  218. }
  219. } else {
  220. if (d_tn->dt.dtor != 0) {
  221. (void)(*d_tn->dt.dtor)(tn->par_addr);
  222. }
  223. if (d_tn->obj_init != 0) {
  224. (void)(*d_tn->dt.dtor)(d_tn->obj_init);
  225. }
  226. }
  227. }
  228. KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
  229. "complete\n",
  230. gtid));
  231. }
  232. }
  233. }
  234. #ifdef KMP_TASK_COMMON_DEBUG
  235. static void dump_list(void) {
  236. int p, q;
  237. for (p = 0; p < __kmp_all_nth; ++p) {
  238. if (!__kmp_threads[p])
  239. continue;
  240. for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
  241. if (__kmp_threads[p]->th.th_pri_common->data[q]) {
  242. struct private_common *tn;
  243. KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
  244. for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
  245. tn = tn->next) {
  246. KC_TRACE(10,
  247. ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
  248. tn->gbl_addr, tn->par_addr));
  249. }
  250. }
  251. }
  252. }
  253. }
  254. #endif /* KMP_TASK_COMMON_DEBUG */
  255. // NOTE: this routine is to be called only from the serial part of the program.
  256. void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
  257. void *data_addr, size_t pc_size) {
  258. struct shared_common **lnk_tn, *d_tn;
  259. KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
  260. __kmp_threads[gtid]->th.th_root->r.r_active == 0);
  261. d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
  262. pc_addr);
  263. if (d_tn == 0) {
  264. d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
  265. d_tn->gbl_addr = pc_addr;
  266. d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
  267. /*
  268. d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
  269. zeroes the memory
  270. d_tn->ct.ctor = 0;
  271. d_tn->cct.cctor = 0;;
  272. d_tn->dt.dtor = 0;
  273. d_tn->is_vec = FALSE;
  274. d_tn->vec_len = 0L;
  275. */
  276. d_tn->cmn_size = pc_size;
  277. __kmp_acquire_lock(&__kmp_global_lock, gtid);
  278. lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
  279. d_tn->next = *lnk_tn;
  280. *lnk_tn = d_tn;
  281. __kmp_release_lock(&__kmp_global_lock, gtid);
  282. }
  283. }
  284. struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
  285. void *data_addr,
  286. size_t pc_size) {
  287. struct private_common *tn, **tt;
  288. struct shared_common *d_tn;
  289. /* +++++++++ START OF CRITICAL SECTION +++++++++ */
  290. __kmp_acquire_lock(&__kmp_global_lock, gtid);
  291. tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
  292. tn->gbl_addr = pc_addr;
  293. d_tn = __kmp_find_shared_task_common(
  294. &__kmp_threadprivate_d_table, gtid,
  295. pc_addr); /* Only the MASTER data table exists. */
  296. if (d_tn != 0) {
  297. /* This threadprivate variable has already been seen. */
  298. if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
  299. d_tn->cmn_size = pc_size;
  300. if (d_tn->is_vec) {
  301. if (d_tn->ct.ctorv != 0) {
  302. /* Construct from scratch so no prototype exists */
  303. d_tn->obj_init = 0;
  304. } else if (d_tn->cct.cctorv != 0) {
  305. /* Now data initialize the prototype since it was previously
  306. * registered */
  307. d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
  308. (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
  309. } else {
  310. d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
  311. }
  312. } else {
  313. if (d_tn->ct.ctor != 0) {
  314. /* Construct from scratch so no prototype exists */
  315. d_tn->obj_init = 0;
  316. } else if (d_tn->cct.cctor != 0) {
  317. /* Now data initialize the prototype since it was previously
  318. registered */
  319. d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
  320. (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
  321. } else {
  322. d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
  323. }
  324. }
  325. }
  326. } else {
  327. struct shared_common **lnk_tn;
  328. d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
  329. d_tn->gbl_addr = pc_addr;
  330. d_tn->cmn_size = pc_size;
  331. d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
  332. /*
  333. d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
  334. zeroes the memory
  335. d_tn->ct.ctor = 0;
  336. d_tn->cct.cctor = 0;
  337. d_tn->dt.dtor = 0;
  338. d_tn->is_vec = FALSE;
  339. d_tn->vec_len = 0L;
  340. */
  341. lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
  342. d_tn->next = *lnk_tn;
  343. *lnk_tn = d_tn;
  344. }
  345. tn->cmn_size = d_tn->cmn_size;
  346. if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
  347. tn->par_addr = (void *)pc_addr;
  348. } else {
  349. tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
  350. }
  351. __kmp_release_lock(&__kmp_global_lock, gtid);
  352. /* +++++++++ END OF CRITICAL SECTION +++++++++ */
  353. #ifdef USE_CHECKS_COMMON
  354. if (pc_size > d_tn->cmn_size) {
  355. KC_TRACE(
  356. 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
  357. " ,%" KMP_UINTPTR_SPEC ")\n",
  358. pc_addr, pc_size, d_tn->cmn_size));
  359. KMP_FATAL(TPCommonBlocksInconsist);
  360. }
  361. #endif /* USE_CHECKS_COMMON */
  362. tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
  363. #ifdef KMP_TASK_COMMON_DEBUG
  364. if (*tt != 0) {
  365. KC_TRACE(
  366. 10,
  367. ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
  368. gtid, pc_addr));
  369. }
  370. #endif
  371. tn->next = *tt;
  372. *tt = tn;
  373. #ifdef KMP_TASK_COMMON_DEBUG
  374. KC_TRACE(10,
  375. ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
  376. gtid, pc_addr));
  377. dump_list();
  378. #endif
  379. /* Link the node into a simple list */
  380. tn->link = __kmp_threads[gtid]->th.th_pri_head;
  381. __kmp_threads[gtid]->th.th_pri_head = tn;
  382. if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
  383. return tn;
  384. /* if C++ object with copy constructor, use it;
  385. * else if C++ object with constructor, use it for the non-primary thread
  386. copies only;
  387. * else use pod_init and memcpy
  388. *
  389. * C++ constructors need to be called once for each non-primary thread on
  390. * allocate
  391. * C++ copy constructors need to be called once for each thread on allocate */
  392. /* C++ object with constructors/destructors; don't call constructors for
  393. primary thread though */
  394. if (d_tn->is_vec) {
  395. if (d_tn->ct.ctorv != 0) {
  396. (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
  397. } else if (d_tn->cct.cctorv != 0) {
  398. (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
  399. } else if (tn->par_addr != tn->gbl_addr) {
  400. __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
  401. }
  402. } else {
  403. if (d_tn->ct.ctor != 0) {
  404. (void)(*d_tn->ct.ctor)(tn->par_addr);
  405. } else if (d_tn->cct.cctor != 0) {
  406. (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
  407. } else if (tn->par_addr != tn->gbl_addr) {
  408. __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
  409. }
  410. }
  411. /* !BUILD_OPENMP_C
  412. if (tn->par_addr != tn->gbl_addr)
  413. __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
  414. return tn;
  415. }
  416. /* ------------------------------------------------------------------------ */
  417. /* We are currently parallel, and we know the thread id. */
  418. /* ------------------------------------------------------------------------ */
  419. /*!
  420. @ingroup THREADPRIVATE
  421. @param loc source location information
  422. @param data pointer to data being privatized
  423. @param ctor pointer to constructor function for data
  424. @param cctor pointer to copy constructor function for data
  425. @param dtor pointer to destructor function for data
  426. Register constructors and destructors for thread private data.
  427. This function is called when executing in parallel, when we know the thread id.
  428. */
  429. void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
  430. kmpc_cctor cctor, kmpc_dtor dtor) {
  431. struct shared_common *d_tn, **lnk_tn;
  432. KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
  433. #ifdef USE_CHECKS_COMMON
  434. /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
  435. KMP_ASSERT(cctor == 0);
  436. #endif /* USE_CHECKS_COMMON */
  437. /* Only the global data table exists. */
  438. d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
  439. if (d_tn == 0) {
  440. d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
  441. d_tn->gbl_addr = data;
  442. d_tn->ct.ctor = ctor;
  443. d_tn->cct.cctor = cctor;
  444. d_tn->dt.dtor = dtor;
  445. /*
  446. d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
  447. zeroes the memory
  448. d_tn->vec_len = 0L;
  449. d_tn->obj_init = 0;
  450. d_tn->pod_init = 0;
  451. */
  452. lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
  453. d_tn->next = *lnk_tn;
  454. *lnk_tn = d_tn;
  455. }
  456. }
  457. void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
  458. size_t size) {
  459. void *ret;
  460. struct private_common *tn;
  461. KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
  462. #ifdef USE_CHECKS_COMMON
  463. if (!__kmp_init_serial)
  464. KMP_FATAL(RTLNotInitialized);
  465. #endif /* USE_CHECKS_COMMON */
  466. if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
  467. /* The parallel address will NEVER overlap with the data_address */
  468. /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
  469. * data_address; use data_address = data */
  470. KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
  471. global_tid));
  472. kmp_threadprivate_insert_private_data(global_tid, data, data, size);
  473. ret = data;
  474. } else {
  475. KC_TRACE(
  476. 50,
  477. ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
  478. global_tid, data));
  479. tn = __kmp_threadprivate_find_task_common(
  480. __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
  481. if (tn) {
  482. KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
  483. #ifdef USE_CHECKS_COMMON
  484. if ((size_t)size > tn->cmn_size) {
  485. KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
  486. " ,%" KMP_UINTPTR_SPEC ")\n",
  487. data, size, tn->cmn_size));
  488. KMP_FATAL(TPCommonBlocksInconsist);
  489. }
  490. #endif /* USE_CHECKS_COMMON */
  491. } else {
  492. /* The parallel address will NEVER overlap with the data_address */
  493. /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
  494. * data_address = data */
  495. KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
  496. tn = kmp_threadprivate_insert(global_tid, data, data, size);
  497. }
  498. ret = tn->par_addr;
  499. }
  500. KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
  501. global_tid, ret));
  502. return ret;
  503. }
  504. static kmp_cached_addr_t *__kmp_find_cache(void *data) {
  505. kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
  506. while (ptr && ptr->data != data)
  507. ptr = ptr->next;
  508. return ptr;
  509. }
  510. /*!
  511. @ingroup THREADPRIVATE
  512. @param loc source location information
  513. @param global_tid global thread number
  514. @param data pointer to data to privatize
  515. @param size size of data to privatize
  516. @param cache pointer to cache
  517. @return pointer to private storage
  518. Allocate private storage for threadprivate data.
  519. */
  520. void *
  521. __kmpc_threadprivate_cached(ident_t *loc,
  522. kmp_int32 global_tid, // gtid.
  523. void *data, // Pointer to original global variable.
  524. size_t size, // Size of original global variable.
  525. void ***cache) {
  526. KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
  527. "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
  528. global_tid, *cache, data, size));
  529. if (TCR_PTR(*cache) == 0) {
  530. __kmp_acquire_lock(&__kmp_global_lock, global_tid);
  531. if (TCR_PTR(*cache) == 0) {
  532. __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
  533. // Compiler often passes in NULL cache, even if it's already been created
  534. void **my_cache;
  535. kmp_cached_addr_t *tp_cache_addr;
  536. // Look for an existing cache
  537. tp_cache_addr = __kmp_find_cache(data);
  538. if (!tp_cache_addr) { // Cache was never created; do it now
  539. __kmp_tp_cached = 1;
  540. KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
  541. sizeof(void *) * __kmp_tp_capacity +
  542. sizeof(kmp_cached_addr_t)););
  543. // No need to zero the allocated memory; __kmp_allocate does that.
  544. KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
  545. "address %p\n",
  546. global_tid, my_cache));
  547. /* TODO: free all this memory in __kmp_common_destroy using
  548. * __kmp_threadpriv_cache_list */
  549. /* Add address of mycache to linked list for cleanup later */
  550. tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
  551. tp_cache_addr->addr = my_cache;
  552. tp_cache_addr->data = data;
  553. tp_cache_addr->compiler_cache = cache;
  554. tp_cache_addr->next = __kmp_threadpriv_cache_list;
  555. __kmp_threadpriv_cache_list = tp_cache_addr;
  556. } else { // A cache was already created; use it
  557. my_cache = tp_cache_addr->addr;
  558. tp_cache_addr->compiler_cache = cache;
  559. }
  560. KMP_MB();
  561. TCW_PTR(*cache, my_cache);
  562. __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
  563. KMP_MB();
  564. }
  565. __kmp_release_lock(&__kmp_global_lock, global_tid);
  566. }
  567. void *ret;
  568. if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
  569. ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
  570. TCW_PTR((*cache)[global_tid], ret);
  571. }
  572. KC_TRACE(10,
  573. ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
  574. global_tid, ret));
  575. return ret;
  576. }
  577. // This function should only be called when both __kmp_tp_cached_lock and
  578. // kmp_forkjoin_lock are held.
  579. void __kmp_threadprivate_resize_cache(int newCapacity) {
  580. KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
  581. newCapacity));
  582. kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
  583. while (ptr) {
  584. if (ptr->data) { // this location has an active cache; resize it
  585. void **my_cache;
  586. KMP_ITT_IGNORE(my_cache =
  587. (void **)__kmp_allocate(sizeof(void *) * newCapacity +
  588. sizeof(kmp_cached_addr_t)););
  589. // No need to zero the allocated memory; __kmp_allocate does that.
  590. KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
  591. my_cache));
  592. // Now copy old cache into new cache
  593. void **old_cache = ptr->addr;
  594. for (int i = 0; i < __kmp_tp_capacity; ++i) {
  595. my_cache[i] = old_cache[i];
  596. }
  597. // Add address of new my_cache to linked list for cleanup later
  598. kmp_cached_addr_t *tp_cache_addr;
  599. tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
  600. tp_cache_addr->addr = my_cache;
  601. tp_cache_addr->data = ptr->data;
  602. tp_cache_addr->compiler_cache = ptr->compiler_cache;
  603. tp_cache_addr->next = __kmp_threadpriv_cache_list;
  604. __kmp_threadpriv_cache_list = tp_cache_addr;
  605. // Copy new cache to compiler's location: We can copy directly
  606. // to (*compiler_cache) if compiler guarantees it will keep
  607. // using the same location for the cache. This is not yet true
  608. // for some compilers, in which case we have to check if
  609. // compiler_cache is still pointing at old cache, and if so, we
  610. // can point it at the new cache with an atomic compare&swap
  611. // operation. (Old method will always work, but we should shift
  612. // to new method (commented line below) when Intel and Clang
  613. // compilers use new method.)
  614. (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
  615. my_cache);
  616. // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
  617. // If the store doesn't happen here, the compiler's old behavior will
  618. // inevitably call __kmpc_threadprivate_cache with a new location for the
  619. // cache, and that function will store the resized cache there at that
  620. // point.
  621. // Nullify old cache's data pointer so we skip it next time
  622. ptr->data = NULL;
  623. }
  624. ptr = ptr->next;
  625. }
  626. // After all caches are resized, update __kmp_tp_capacity to the new size
  627. *(volatile int *)&__kmp_tp_capacity = newCapacity;
  628. }
  629. /*!
  630. @ingroup THREADPRIVATE
  631. @param loc source location information
  632. @param data pointer to data being privatized
  633. @param ctor pointer to constructor function for data
  634. @param cctor pointer to copy constructor function for data
  635. @param dtor pointer to destructor function for data
  636. @param vector_length length of the vector (bytes or elements?)
  637. Register vector constructors and destructors for thread private data.
  638. */
  639. void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
  640. kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
  641. kmpc_dtor_vec dtor,
  642. size_t vector_length) {
  643. struct shared_common *d_tn, **lnk_tn;
  644. KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
  645. #ifdef USE_CHECKS_COMMON
  646. /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
  647. KMP_ASSERT(cctor == 0);
  648. #endif /* USE_CHECKS_COMMON */
  649. d_tn = __kmp_find_shared_task_common(
  650. &__kmp_threadprivate_d_table, -1,
  651. data); /* Only the global data table exists. */
  652. if (d_tn == 0) {
  653. d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
  654. d_tn->gbl_addr = data;
  655. d_tn->ct.ctorv = ctor;
  656. d_tn->cct.cctorv = cctor;
  657. d_tn->dt.dtorv = dtor;
  658. d_tn->is_vec = TRUE;
  659. d_tn->vec_len = (size_t)vector_length;
  660. // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory
  661. // d_tn->pod_init = 0;
  662. lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
  663. d_tn->next = *lnk_tn;
  664. *lnk_tn = d_tn;
  665. }
  666. }
  667. void __kmp_cleanup_threadprivate_caches() {
  668. kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
  669. while (ptr) {
  670. void **cache = ptr->addr;
  671. __kmp_threadpriv_cache_list = ptr->next;
  672. if (*ptr->compiler_cache)
  673. *ptr->compiler_cache = NULL;
  674. ptr->compiler_cache = NULL;
  675. ptr->data = NULL;
  676. ptr->addr = NULL;
  677. ptr->next = NULL;
  678. // Threadprivate data pointed at by cache entries are destroyed at end of
  679. // __kmp_launch_thread with __kmp_common_destroy_gtid.
  680. __kmp_free(cache); // implicitly frees ptr too
  681. ptr = __kmp_threadpriv_cache_list;
  682. }
  683. }