kmp_csupport.cpp 149 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537
  1. /*
  2. * kmp_csupport.cpp -- kfront linkage support for OpenMP.
  3. */
  4. //===----------------------------------------------------------------------===//
  5. //
  6. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  7. // See https://llvm.org/LICENSE.txt for license information.
  8. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #define __KMP_IMP
  12. #include "omp.h" /* extern "C" declarations of user-visible routines */
  13. #include "kmp.h"
  14. #include "kmp_error.h"
  15. #include "kmp_i18n.h"
  16. #include "kmp_itt.h"
  17. #include "kmp_lock.h"
  18. #include "kmp_stats.h"
  19. #include "ompt-specific.h"
  20. #define MAX_MESSAGE 512
  21. // flags will be used in future, e.g. to implement openmp_strict library
  22. // restrictions
  23. /*!
  24. * @ingroup STARTUP_SHUTDOWN
  25. * @param loc in source location information
  26. * @param flags in for future use (currently ignored)
  27. *
  28. * Initialize the runtime library. This call is optional; if it is not made then
  29. * it will be implicitly called by attempts to use other library functions.
  30. */
  31. void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
  32. // By default __kmpc_begin() is no-op.
  33. char *env;
  34. if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
  35. __kmp_str_match_true(env)) {
  36. __kmp_middle_initialize();
  37. __kmp_assign_root_init_mask();
  38. KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
  39. } else if (__kmp_ignore_mppbeg() == FALSE) {
  40. // By default __kmp_ignore_mppbeg() returns TRUE.
  41. __kmp_internal_begin();
  42. KC_TRACE(10, ("__kmpc_begin: called\n"));
  43. }
  44. }
  45. /*!
  46. * @ingroup STARTUP_SHUTDOWN
  47. * @param loc source location information
  48. *
  49. * Shutdown the runtime library. This is also optional, and even if called will
  50. * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
  51. * zero.
  52. */
  53. void __kmpc_end(ident_t *loc) {
  54. // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
  55. // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
  56. // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
  57. // returns FALSE and __kmpc_end() will unregister this root (it can cause
  58. // library shut down).
  59. if (__kmp_ignore_mppend() == FALSE) {
  60. KC_TRACE(10, ("__kmpc_end: called\n"));
  61. KA_TRACE(30, ("__kmpc_end\n"));
  62. __kmp_internal_end_thread(-1);
  63. }
  64. #if KMP_OS_WINDOWS && OMPT_SUPPORT
  65. // Normal exit process on Windows does not allow worker threads of the final
  66. // parallel region to finish reporting their events, so shutting down the
  67. // library here fixes the issue at least for the cases where __kmpc_end() is
  68. // placed properly.
  69. if (ompt_enabled.enabled)
  70. __kmp_internal_end_library(__kmp_gtid_get_specific());
  71. #endif
  72. }
  73. /*!
  74. @ingroup THREAD_STATES
  75. @param loc Source location information.
  76. @return The global thread index of the active thread.
  77. This function can be called in any context.
  78. If the runtime has ony been entered at the outermost level from a
  79. single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
  80. that which would be returned by omp_get_thread_num() in the outermost
  81. active parallel construct. (Or zero if there is no active parallel
  82. construct, since the primary thread is necessarily thread zero).
  83. If multiple non-OpenMP threads all enter an OpenMP construct then this
  84. will be a unique thread identifier among all the threads created by
  85. the OpenMP runtime (but the value cannot be defined in terms of
  86. OpenMP thread ids returned by omp_get_thread_num()).
  87. */
  88. kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
  89. kmp_int32 gtid = __kmp_entry_gtid();
  90. KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
  91. return gtid;
  92. }
  93. /*!
  94. @ingroup THREAD_STATES
  95. @param loc Source location information.
  96. @return The number of threads under control of the OpenMP<sup>*</sup> runtime
  97. This function can be called in any context.
  98. It returns the total number of threads under the control of the OpenMP runtime.
  99. That is not a number that can be determined by any OpenMP standard calls, since
  100. the library may be called from more than one non-OpenMP thread, and this
  101. reflects the total over all such calls. Similarly the runtime maintains
  102. underlying threads even when they are not active (since the cost of creating
  103. and destroying OS threads is high), this call counts all such threads even if
  104. they are not waiting for work.
  105. */
  106. kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
  107. KC_TRACE(10,
  108. ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
  109. return TCR_4(__kmp_all_nth);
  110. }
  111. /*!
  112. @ingroup THREAD_STATES
  113. @param loc Source location information.
  114. @return The thread number of the calling thread in the innermost active parallel
  115. construct.
  116. */
  117. kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
  118. KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
  119. return __kmp_tid_from_gtid(__kmp_entry_gtid());
  120. }
  121. /*!
  122. @ingroup THREAD_STATES
  123. @param loc Source location information.
  124. @return The number of threads in the innermost active parallel construct.
  125. */
  126. kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
  127. KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
  128. return __kmp_entry_thread()->th.th_team->t.t_nproc;
  129. }
  130. /*!
  131. * @ingroup DEPRECATED
  132. * @param loc location description
  133. *
  134. * This function need not be called. It always returns TRUE.
  135. */
  136. kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
  137. #ifndef KMP_DEBUG
  138. return TRUE;
  139. #else
  140. const char *semi2;
  141. const char *semi3;
  142. int line_no;
  143. if (__kmp_par_range == 0) {
  144. return TRUE;
  145. }
  146. semi2 = loc->psource;
  147. if (semi2 == NULL) {
  148. return TRUE;
  149. }
  150. semi2 = strchr(semi2, ';');
  151. if (semi2 == NULL) {
  152. return TRUE;
  153. }
  154. semi2 = strchr(semi2 + 1, ';');
  155. if (semi2 == NULL) {
  156. return TRUE;
  157. }
  158. if (__kmp_par_range_filename[0]) {
  159. const char *name = semi2 - 1;
  160. while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
  161. name--;
  162. }
  163. if ((*name == '/') || (*name == ';')) {
  164. name++;
  165. }
  166. if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
  167. return __kmp_par_range < 0;
  168. }
  169. }
  170. semi3 = strchr(semi2 + 1, ';');
  171. if (__kmp_par_range_routine[0]) {
  172. if ((semi3 != NULL) && (semi3 > semi2) &&
  173. (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
  174. return __kmp_par_range < 0;
  175. }
  176. }
  177. if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
  178. if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
  179. return __kmp_par_range > 0;
  180. }
  181. return __kmp_par_range < 0;
  182. }
  183. return TRUE;
  184. #endif /* KMP_DEBUG */
  185. }
  186. /*!
  187. @ingroup THREAD_STATES
  188. @param loc Source location information.
  189. @return 1 if this thread is executing inside an active parallel region, zero if
  190. not.
  191. */
  192. kmp_int32 __kmpc_in_parallel(ident_t *loc) {
  193. return __kmp_entry_thread()->th.th_root->r.r_active;
  194. }
  195. /*!
  196. @ingroup PARALLEL
  197. @param loc source location information
  198. @param global_tid global thread number
  199. @param num_threads number of threads requested for this parallel construct
  200. Set the number of threads to be used by the next fork spawned by this thread.
  201. This call is only required if the parallel construct has a `num_threads` clause.
  202. */
  203. void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
  204. kmp_int32 num_threads) {
  205. KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
  206. global_tid, num_threads));
  207. __kmp_assert_valid_gtid(global_tid);
  208. __kmp_push_num_threads(loc, global_tid, num_threads);
  209. }
  210. void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
  211. KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
  212. /* the num_threads are automatically popped */
  213. }
  214. void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
  215. kmp_int32 proc_bind) {
  216. KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
  217. proc_bind));
  218. __kmp_assert_valid_gtid(global_tid);
  219. __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
  220. }
  221. /*!
  222. @ingroup PARALLEL
  223. @param loc source location information
  224. @param argc total number of arguments in the ellipsis
  225. @param microtask pointer to callback routine consisting of outlined parallel
  226. construct
  227. @param ... pointers to shared variables that aren't global
  228. Do the actual fork and call the microtask in the relevant number of threads.
  229. */
  230. void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
  231. int gtid = __kmp_entry_gtid();
  232. #if (KMP_STATS_ENABLED)
  233. // If we were in a serial region, then stop the serial timer, record
  234. // the event, and start parallel region timer
  235. stats_state_e previous_state = KMP_GET_THREAD_STATE();
  236. if (previous_state == stats_state_e::SERIAL_REGION) {
  237. KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
  238. } else {
  239. KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
  240. }
  241. int inParallel = __kmpc_in_parallel(loc);
  242. if (inParallel) {
  243. KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
  244. } else {
  245. KMP_COUNT_BLOCK(OMP_PARALLEL);
  246. }
  247. #endif
  248. // maybe to save thr_state is enough here
  249. {
  250. va_list ap;
  251. va_start(ap, microtask);
  252. #if OMPT_SUPPORT
  253. ompt_frame_t *ompt_frame;
  254. if (ompt_enabled.enabled) {
  255. kmp_info_t *master_th = __kmp_threads[gtid];
  256. ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
  257. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  258. }
  259. OMPT_STORE_RETURN_ADDRESS(gtid);
  260. #endif
  261. #if INCLUDE_SSC_MARKS
  262. SSC_MARK_FORKING();
  263. #endif
  264. __kmp_fork_call(loc, gtid, fork_context_intel, argc,
  265. VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
  266. VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
  267. kmp_va_addr_of(ap));
  268. #if INCLUDE_SSC_MARKS
  269. SSC_MARK_JOINING();
  270. #endif
  271. __kmp_join_call(loc, gtid
  272. #if OMPT_SUPPORT
  273. ,
  274. fork_context_intel
  275. #endif
  276. );
  277. va_end(ap);
  278. #if OMPT_SUPPORT
  279. if (ompt_enabled.enabled) {
  280. ompt_frame->enter_frame = ompt_data_none;
  281. }
  282. #endif
  283. }
  284. #if KMP_STATS_ENABLED
  285. if (previous_state == stats_state_e::SERIAL_REGION) {
  286. KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
  287. KMP_SET_THREAD_STATE(previous_state);
  288. } else {
  289. KMP_POP_PARTITIONED_TIMER();
  290. }
  291. #endif // KMP_STATS_ENABLED
  292. }
  293. /*!
  294. @ingroup PARALLEL
  295. @param loc source location information
  296. @param global_tid global thread number
  297. @param num_teams number of teams requested for the teams construct
  298. @param num_threads number of threads per team requested for the teams construct
  299. Set the number of teams to be used by the teams construct.
  300. This call is only required if the teams construct has a `num_teams` clause
  301. or a `thread_limit` clause (or both).
  302. */
  303. void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
  304. kmp_int32 num_teams, kmp_int32 num_threads) {
  305. KA_TRACE(20,
  306. ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
  307. global_tid, num_teams, num_threads));
  308. __kmp_assert_valid_gtid(global_tid);
  309. __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
  310. }
  311. /*!
  312. @ingroup PARALLEL
  313. @param loc source location information
  314. @param global_tid global thread number
  315. @param num_teams_lb lower bound on number of teams requested for the teams
  316. construct
  317. @param num_teams_ub upper bound on number of teams requested for the teams
  318. construct
  319. @param num_threads number of threads per team requested for the teams construct
  320. Set the number of teams to be used by the teams construct. The number of initial
  321. teams cretaed will be greater than or equal to the lower bound and less than or
  322. equal to the upper bound.
  323. This call is only required if the teams construct has a `num_teams` clause
  324. or a `thread_limit` clause (or both).
  325. */
  326. void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
  327. kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
  328. kmp_int32 num_threads) {
  329. KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
  330. " num_teams_ub=%d num_threads=%d\n",
  331. global_tid, num_teams_lb, num_teams_ub, num_threads));
  332. __kmp_assert_valid_gtid(global_tid);
  333. __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
  334. num_threads);
  335. }
  336. /*!
  337. @ingroup PARALLEL
  338. @param loc source location information
  339. @param argc total number of arguments in the ellipsis
  340. @param microtask pointer to callback routine consisting of outlined teams
  341. construct
  342. @param ... pointers to shared variables that aren't global
  343. Do the actual fork and call the microtask in the relevant number of threads.
  344. */
  345. void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
  346. ...) {
  347. int gtid = __kmp_entry_gtid();
  348. kmp_info_t *this_thr = __kmp_threads[gtid];
  349. va_list ap;
  350. va_start(ap, microtask);
  351. #if KMP_STATS_ENABLED
  352. KMP_COUNT_BLOCK(OMP_TEAMS);
  353. stats_state_e previous_state = KMP_GET_THREAD_STATE();
  354. if (previous_state == stats_state_e::SERIAL_REGION) {
  355. KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
  356. } else {
  357. KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
  358. }
  359. #endif
  360. // remember teams entry point and nesting level
  361. this_thr->th.th_teams_microtask = microtask;
  362. this_thr->th.th_teams_level =
  363. this_thr->th.th_team->t.t_level; // AC: can be >0 on host
  364. #if OMPT_SUPPORT
  365. kmp_team_t *parent_team = this_thr->th.th_team;
  366. int tid = __kmp_tid_from_gtid(gtid);
  367. if (ompt_enabled.enabled) {
  368. parent_team->t.t_implicit_task_taskdata[tid]
  369. .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  370. }
  371. OMPT_STORE_RETURN_ADDRESS(gtid);
  372. #endif
  373. // check if __kmpc_push_num_teams called, set default number of teams
  374. // otherwise
  375. if (this_thr->th.th_teams_size.nteams == 0) {
  376. __kmp_push_num_teams(loc, gtid, 0, 0);
  377. }
  378. KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
  379. KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
  380. KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
  381. __kmp_fork_call(
  382. loc, gtid, fork_context_intel, argc,
  383. VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
  384. VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
  385. __kmp_join_call(loc, gtid
  386. #if OMPT_SUPPORT
  387. ,
  388. fork_context_intel
  389. #endif
  390. );
  391. // Pop current CG root off list
  392. KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
  393. kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
  394. this_thr->th.th_cg_roots = tmp->up;
  395. KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
  396. " to node %p. cg_nthreads was %d\n",
  397. this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
  398. KMP_DEBUG_ASSERT(tmp->cg_nthreads);
  399. int i = tmp->cg_nthreads--;
  400. if (i == 1) { // check is we are the last thread in CG (not always the case)
  401. __kmp_free(tmp);
  402. }
  403. // Restore current task's thread_limit from CG root
  404. KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
  405. this_thr->th.th_current_task->td_icvs.thread_limit =
  406. this_thr->th.th_cg_roots->cg_thread_limit;
  407. this_thr->th.th_teams_microtask = NULL;
  408. this_thr->th.th_teams_level = 0;
  409. *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
  410. va_end(ap);
  411. #if KMP_STATS_ENABLED
  412. if (previous_state == stats_state_e::SERIAL_REGION) {
  413. KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
  414. KMP_SET_THREAD_STATE(previous_state);
  415. } else {
  416. KMP_POP_PARTITIONED_TIMER();
  417. }
  418. #endif // KMP_STATS_ENABLED
  419. }
  420. // I don't think this function should ever have been exported.
  421. // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
  422. // openmp code ever called it, but it's been exported from the RTL for so
  423. // long that I'm afraid to remove the definition.
  424. int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
  425. /*!
  426. @ingroup PARALLEL
  427. @param loc source location information
  428. @param global_tid global thread number
  429. Enter a serialized parallel construct. This interface is used to handle a
  430. conditional parallel region, like this,
  431. @code
  432. #pragma omp parallel if (condition)
  433. @endcode
  434. when the condition is false.
  435. */
  436. void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
  437. // The implementation is now in kmp_runtime.cpp so that it can share static
  438. // functions with kmp_fork_call since the tasks to be done are similar in
  439. // each case.
  440. __kmp_assert_valid_gtid(global_tid);
  441. #if OMPT_SUPPORT
  442. OMPT_STORE_RETURN_ADDRESS(global_tid);
  443. #endif
  444. __kmp_serialized_parallel(loc, global_tid);
  445. }
  446. /*!
  447. @ingroup PARALLEL
  448. @param loc source location information
  449. @param global_tid global thread number
  450. Leave a serialized parallel construct.
  451. */
  452. void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
  453. kmp_internal_control_t *top;
  454. kmp_info_t *this_thr;
  455. kmp_team_t *serial_team;
  456. KC_TRACE(10,
  457. ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
  458. /* skip all this code for autopar serialized loops since it results in
  459. unacceptable overhead */
  460. if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
  461. return;
  462. // Not autopar code
  463. __kmp_assert_valid_gtid(global_tid);
  464. if (!TCR_4(__kmp_init_parallel))
  465. __kmp_parallel_initialize();
  466. __kmp_resume_if_soft_paused();
  467. this_thr = __kmp_threads[global_tid];
  468. serial_team = this_thr->th.th_serial_team;
  469. kmp_task_team_t *task_team = this_thr->th.th_task_team;
  470. // we need to wait for the proxy tasks before finishing the thread
  471. if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
  472. task_team->tt.tt_hidden_helper_task_encountered))
  473. __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
  474. KMP_MB();
  475. KMP_DEBUG_ASSERT(serial_team);
  476. KMP_ASSERT(serial_team->t.t_serialized);
  477. KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
  478. KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
  479. KMP_DEBUG_ASSERT(serial_team->t.t_threads);
  480. KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
  481. #if OMPT_SUPPORT
  482. if (ompt_enabled.enabled &&
  483. this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
  484. OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
  485. if (ompt_enabled.ompt_callback_implicit_task) {
  486. ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
  487. ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
  488. OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
  489. }
  490. // reset clear the task id only after unlinking the task
  491. ompt_data_t *parent_task_data;
  492. __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
  493. if (ompt_enabled.ompt_callback_parallel_end) {
  494. ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
  495. &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
  496. ompt_parallel_invoker_program | ompt_parallel_team,
  497. OMPT_LOAD_RETURN_ADDRESS(global_tid));
  498. }
  499. __ompt_lw_taskteam_unlink(this_thr);
  500. this_thr->th.ompt_thread_info.state = ompt_state_overhead;
  501. }
  502. #endif
  503. /* If necessary, pop the internal control stack values and replace the team
  504. * values */
  505. top = serial_team->t.t_control_stack_top;
  506. if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
  507. copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
  508. serial_team->t.t_control_stack_top = top->next;
  509. __kmp_free(top);
  510. }
  511. /* pop dispatch buffers stack */
  512. KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
  513. {
  514. dispatch_private_info_t *disp_buffer =
  515. serial_team->t.t_dispatch->th_disp_buffer;
  516. serial_team->t.t_dispatch->th_disp_buffer =
  517. serial_team->t.t_dispatch->th_disp_buffer->next;
  518. __kmp_free(disp_buffer);
  519. }
  520. this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
  521. --serial_team->t.t_serialized;
  522. if (serial_team->t.t_serialized == 0) {
  523. /* return to the parallel section */
  524. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  525. if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
  526. __kmp_clear_x87_fpu_status_word();
  527. __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
  528. __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
  529. }
  530. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  531. __kmp_pop_current_task_from_thread(this_thr);
  532. #if OMPD_SUPPORT
  533. if (ompd_state & OMPD_ENABLE_BP)
  534. ompd_bp_parallel_end();
  535. #endif
  536. this_thr->th.th_team = serial_team->t.t_parent;
  537. this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
  538. /* restore values cached in the thread */
  539. this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
  540. this_thr->th.th_team_master =
  541. serial_team->t.t_parent->t.t_threads[0]; /* JPH */
  542. this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
  543. /* TODO the below shouldn't need to be adjusted for serialized teams */
  544. this_thr->th.th_dispatch =
  545. &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
  546. KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
  547. this_thr->th.th_current_task->td_flags.executing = 1;
  548. if (__kmp_tasking_mode != tskm_immediate_exec) {
  549. // Copy the task team from the new child / old parent team to the thread.
  550. this_thr->th.th_task_team =
  551. this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
  552. KA_TRACE(20,
  553. ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
  554. "team %p\n",
  555. global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
  556. }
  557. #if KMP_AFFINITY_SUPPORTED
  558. if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
  559. __kmp_reset_root_init_mask(global_tid);
  560. }
  561. #endif
  562. } else {
  563. if (__kmp_tasking_mode != tskm_immediate_exec) {
  564. KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
  565. "depth of serial team %p to %d\n",
  566. global_tid, serial_team, serial_team->t.t_serialized));
  567. }
  568. }
  569. serial_team->t.t_level--;
  570. if (__kmp_env_consistency_check)
  571. __kmp_pop_parallel(global_tid, NULL);
  572. #if OMPT_SUPPORT
  573. if (ompt_enabled.enabled)
  574. this_thr->th.ompt_thread_info.state =
  575. ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
  576. : ompt_state_work_parallel);
  577. #endif
  578. }
  579. /*!
  580. @ingroup SYNCHRONIZATION
  581. @param loc source location information.
  582. Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
  583. depending on the memory ordering convention obeyed by the compiler
  584. even that may not be necessary).
  585. */
  586. void __kmpc_flush(ident_t *loc) {
  587. KC_TRACE(10, ("__kmpc_flush: called\n"));
  588. /* need explicit __mf() here since use volatile instead in library */
  589. KMP_MB(); /* Flush all pending memory write invalidates. */
  590. #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
  591. #if KMP_MIC
  592. // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
  593. // We shouldn't need it, though, since the ABI rules require that
  594. // * If the compiler generates NGO stores it also generates the fence
  595. // * If users hand-code NGO stores they should insert the fence
  596. // therefore no incomplete unordered stores should be visible.
  597. #else
  598. // C74404
  599. // This is to address non-temporal store instructions (sfence needed).
  600. // The clflush instruction is addressed either (mfence needed).
  601. // Probably the non-temporal load monvtdqa instruction should also be
  602. // addressed.
  603. // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
  604. if (!__kmp_cpuinfo.initialized) {
  605. __kmp_query_cpuid(&__kmp_cpuinfo);
  606. }
  607. if (!__kmp_cpuinfo.flags.sse2) {
  608. // CPU cannot execute SSE2 instructions.
  609. } else {
  610. #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
  611. _mm_mfence();
  612. #elif KMP_COMPILER_MSVC
  613. MemoryBarrier();
  614. #else
  615. __sync_synchronize();
  616. #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
  617. }
  618. #endif // KMP_MIC
  619. #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
  620. KMP_ARCH_RISCV64)
  621. // Nothing to see here move along
  622. #elif KMP_ARCH_PPC64
  623. // Nothing needed here (we have a real MB above).
  624. #else
  625. #error Unknown or unsupported architecture
  626. #endif
  627. #if OMPT_SUPPORT && OMPT_OPTIONAL
  628. if (ompt_enabled.ompt_callback_flush) {
  629. ompt_callbacks.ompt_callback(ompt_callback_flush)(
  630. __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
  631. }
  632. #endif
  633. }
  634. /* -------------------------------------------------------------------------- */
  635. /*!
  636. @ingroup SYNCHRONIZATION
  637. @param loc source location information
  638. @param global_tid thread id.
  639. Execute a barrier.
  640. */
  641. void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
  642. KMP_COUNT_BLOCK(OMP_BARRIER);
  643. KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
  644. __kmp_assert_valid_gtid(global_tid);
  645. if (!TCR_4(__kmp_init_parallel))
  646. __kmp_parallel_initialize();
  647. __kmp_resume_if_soft_paused();
  648. if (__kmp_env_consistency_check) {
  649. if (loc == 0) {
  650. KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
  651. }
  652. __kmp_check_barrier(global_tid, ct_barrier, loc);
  653. }
  654. #if OMPT_SUPPORT
  655. ompt_frame_t *ompt_frame;
  656. if (ompt_enabled.enabled) {
  657. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  658. if (ompt_frame->enter_frame.ptr == NULL)
  659. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  660. }
  661. OMPT_STORE_RETURN_ADDRESS(global_tid);
  662. #endif
  663. __kmp_threads[global_tid]->th.th_ident = loc;
  664. // TODO: explicit barrier_wait_id:
  665. // this function is called when 'barrier' directive is present or
  666. // implicit barrier at the end of a worksharing construct.
  667. // 1) better to add a per-thread barrier counter to a thread data structure
  668. // 2) set to 0 when a new team is created
  669. // 4) no sync is required
  670. __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
  671. #if OMPT_SUPPORT && OMPT_OPTIONAL
  672. if (ompt_enabled.enabled) {
  673. ompt_frame->enter_frame = ompt_data_none;
  674. }
  675. #endif
  676. }
  677. /* The BARRIER for a MASTER section is always explicit */
  678. /*!
  679. @ingroup WORK_SHARING
  680. @param loc source location information.
  681. @param global_tid global thread number .
  682. @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
  683. */
  684. kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
  685. int status = 0;
  686. KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
  687. __kmp_assert_valid_gtid(global_tid);
  688. if (!TCR_4(__kmp_init_parallel))
  689. __kmp_parallel_initialize();
  690. __kmp_resume_if_soft_paused();
  691. if (KMP_MASTER_GTID(global_tid)) {
  692. KMP_COUNT_BLOCK(OMP_MASTER);
  693. KMP_PUSH_PARTITIONED_TIMER(OMP_master);
  694. status = 1;
  695. }
  696. #if OMPT_SUPPORT && OMPT_OPTIONAL
  697. if (status) {
  698. if (ompt_enabled.ompt_callback_masked) {
  699. kmp_info_t *this_thr = __kmp_threads[global_tid];
  700. kmp_team_t *team = this_thr->th.th_team;
  701. int tid = __kmp_tid_from_gtid(global_tid);
  702. ompt_callbacks.ompt_callback(ompt_callback_masked)(
  703. ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
  704. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  705. OMPT_GET_RETURN_ADDRESS(0));
  706. }
  707. }
  708. #endif
  709. if (__kmp_env_consistency_check) {
  710. #if KMP_USE_DYNAMIC_LOCK
  711. if (status)
  712. __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
  713. else
  714. __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
  715. #else
  716. if (status)
  717. __kmp_push_sync(global_tid, ct_master, loc, NULL);
  718. else
  719. __kmp_check_sync(global_tid, ct_master, loc, NULL);
  720. #endif
  721. }
  722. return status;
  723. }
  724. /*!
  725. @ingroup WORK_SHARING
  726. @param loc source location information.
  727. @param global_tid global thread number .
  728. Mark the end of a <tt>master</tt> region. This should only be called by the
  729. thread that executes the <tt>master</tt> region.
  730. */
  731. void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
  732. KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
  733. __kmp_assert_valid_gtid(global_tid);
  734. KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
  735. KMP_POP_PARTITIONED_TIMER();
  736. #if OMPT_SUPPORT && OMPT_OPTIONAL
  737. kmp_info_t *this_thr = __kmp_threads[global_tid];
  738. kmp_team_t *team = this_thr->th.th_team;
  739. if (ompt_enabled.ompt_callback_masked) {
  740. int tid = __kmp_tid_from_gtid(global_tid);
  741. ompt_callbacks.ompt_callback(ompt_callback_masked)(
  742. ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
  743. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  744. OMPT_GET_RETURN_ADDRESS(0));
  745. }
  746. #endif
  747. if (__kmp_env_consistency_check) {
  748. if (KMP_MASTER_GTID(global_tid))
  749. __kmp_pop_sync(global_tid, ct_master, loc);
  750. }
  751. }
  752. /*!
  753. @ingroup WORK_SHARING
  754. @param loc source location information.
  755. @param global_tid global thread number.
  756. @param filter result of evaluating filter clause on thread global_tid, or zero
  757. if no filter clause present
  758. @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
  759. */
  760. kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
  761. int status = 0;
  762. int tid;
  763. KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
  764. __kmp_assert_valid_gtid(global_tid);
  765. if (!TCR_4(__kmp_init_parallel))
  766. __kmp_parallel_initialize();
  767. __kmp_resume_if_soft_paused();
  768. tid = __kmp_tid_from_gtid(global_tid);
  769. if (tid == filter) {
  770. KMP_COUNT_BLOCK(OMP_MASKED);
  771. KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
  772. status = 1;
  773. }
  774. #if OMPT_SUPPORT && OMPT_OPTIONAL
  775. if (status) {
  776. if (ompt_enabled.ompt_callback_masked) {
  777. kmp_info_t *this_thr = __kmp_threads[global_tid];
  778. kmp_team_t *team = this_thr->th.th_team;
  779. ompt_callbacks.ompt_callback(ompt_callback_masked)(
  780. ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
  781. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  782. OMPT_GET_RETURN_ADDRESS(0));
  783. }
  784. }
  785. #endif
  786. if (__kmp_env_consistency_check) {
  787. #if KMP_USE_DYNAMIC_LOCK
  788. if (status)
  789. __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
  790. else
  791. __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
  792. #else
  793. if (status)
  794. __kmp_push_sync(global_tid, ct_masked, loc, NULL);
  795. else
  796. __kmp_check_sync(global_tid, ct_masked, loc, NULL);
  797. #endif
  798. }
  799. return status;
  800. }
  801. /*!
  802. @ingroup WORK_SHARING
  803. @param loc source location information.
  804. @param global_tid global thread number .
  805. Mark the end of a <tt>masked</tt> region. This should only be called by the
  806. thread that executes the <tt>masked</tt> region.
  807. */
  808. void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
  809. KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
  810. __kmp_assert_valid_gtid(global_tid);
  811. KMP_POP_PARTITIONED_TIMER();
  812. #if OMPT_SUPPORT && OMPT_OPTIONAL
  813. kmp_info_t *this_thr = __kmp_threads[global_tid];
  814. kmp_team_t *team = this_thr->th.th_team;
  815. if (ompt_enabled.ompt_callback_masked) {
  816. int tid = __kmp_tid_from_gtid(global_tid);
  817. ompt_callbacks.ompt_callback(ompt_callback_masked)(
  818. ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
  819. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  820. OMPT_GET_RETURN_ADDRESS(0));
  821. }
  822. #endif
  823. if (__kmp_env_consistency_check) {
  824. __kmp_pop_sync(global_tid, ct_masked, loc);
  825. }
  826. }
  827. /*!
  828. @ingroup WORK_SHARING
  829. @param loc source location information.
  830. @param gtid global thread number.
  831. Start execution of an <tt>ordered</tt> construct.
  832. */
  833. void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
  834. int cid = 0;
  835. kmp_info_t *th;
  836. KMP_DEBUG_ASSERT(__kmp_init_serial);
  837. KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
  838. __kmp_assert_valid_gtid(gtid);
  839. if (!TCR_4(__kmp_init_parallel))
  840. __kmp_parallel_initialize();
  841. __kmp_resume_if_soft_paused();
  842. #if USE_ITT_BUILD
  843. __kmp_itt_ordered_prep(gtid);
  844. // TODO: ordered_wait_id
  845. #endif /* USE_ITT_BUILD */
  846. th = __kmp_threads[gtid];
  847. #if OMPT_SUPPORT && OMPT_OPTIONAL
  848. kmp_team_t *team;
  849. ompt_wait_id_t lck;
  850. void *codeptr_ra;
  851. OMPT_STORE_RETURN_ADDRESS(gtid);
  852. if (ompt_enabled.enabled) {
  853. team = __kmp_team_from_gtid(gtid);
  854. lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
  855. /* OMPT state update */
  856. th->th.ompt_thread_info.wait_id = lck;
  857. th->th.ompt_thread_info.state = ompt_state_wait_ordered;
  858. /* OMPT event callback */
  859. codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
  860. if (ompt_enabled.ompt_callback_mutex_acquire) {
  861. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  862. ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
  863. codeptr_ra);
  864. }
  865. }
  866. #endif
  867. if (th->th.th_dispatch->th_deo_fcn != 0)
  868. (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
  869. else
  870. __kmp_parallel_deo(&gtid, &cid, loc);
  871. #if OMPT_SUPPORT && OMPT_OPTIONAL
  872. if (ompt_enabled.enabled) {
  873. /* OMPT state update */
  874. th->th.ompt_thread_info.state = ompt_state_work_parallel;
  875. th->th.ompt_thread_info.wait_id = 0;
  876. /* OMPT event callback */
  877. if (ompt_enabled.ompt_callback_mutex_acquired) {
  878. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  879. ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
  880. }
  881. }
  882. #endif
  883. #if USE_ITT_BUILD
  884. __kmp_itt_ordered_start(gtid);
  885. #endif /* USE_ITT_BUILD */
  886. }
  887. /*!
  888. @ingroup WORK_SHARING
  889. @param loc source location information.
  890. @param gtid global thread number.
  891. End execution of an <tt>ordered</tt> construct.
  892. */
  893. void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
  894. int cid = 0;
  895. kmp_info_t *th;
  896. KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
  897. __kmp_assert_valid_gtid(gtid);
  898. #if USE_ITT_BUILD
  899. __kmp_itt_ordered_end(gtid);
  900. // TODO: ordered_wait_id
  901. #endif /* USE_ITT_BUILD */
  902. th = __kmp_threads[gtid];
  903. if (th->th.th_dispatch->th_dxo_fcn != 0)
  904. (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
  905. else
  906. __kmp_parallel_dxo(&gtid, &cid, loc);
  907. #if OMPT_SUPPORT && OMPT_OPTIONAL
  908. OMPT_STORE_RETURN_ADDRESS(gtid);
  909. if (ompt_enabled.ompt_callback_mutex_released) {
  910. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  911. ompt_mutex_ordered,
  912. (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
  913. ->t.t_ordered.dt.t_value,
  914. OMPT_LOAD_RETURN_ADDRESS(gtid));
  915. }
  916. #endif
  917. }
  918. #if KMP_USE_DYNAMIC_LOCK
  919. static __forceinline void
  920. __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
  921. kmp_int32 gtid, kmp_indirect_locktag_t tag) {
  922. // Pointer to the allocated indirect lock is written to crit, while indexing
  923. // is ignored.
  924. void *idx;
  925. kmp_indirect_lock_t **lck;
  926. lck = (kmp_indirect_lock_t **)crit;
  927. kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
  928. KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
  929. KMP_SET_I_LOCK_LOCATION(ilk, loc);
  930. KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
  931. KA_TRACE(20,
  932. ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
  933. #if USE_ITT_BUILD
  934. __kmp_itt_critical_creating(ilk->lock, loc);
  935. #endif
  936. int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
  937. if (status == 0) {
  938. #if USE_ITT_BUILD
  939. __kmp_itt_critical_destroyed(ilk->lock);
  940. #endif
  941. // We don't really need to destroy the unclaimed lock here since it will be
  942. // cleaned up at program exit.
  943. // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
  944. }
  945. KMP_DEBUG_ASSERT(*lck != NULL);
  946. }
  947. // Fast-path acquire tas lock
  948. #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
  949. { \
  950. kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
  951. kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
  952. kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
  953. if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
  954. !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
  955. kmp_uint32 spins; \
  956. KMP_FSYNC_PREPARE(l); \
  957. KMP_INIT_YIELD(spins); \
  958. kmp_backoff_t backoff = __kmp_spin_backoff_params; \
  959. do { \
  960. if (TCR_4(__kmp_nth) > \
  961. (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
  962. KMP_YIELD(TRUE); \
  963. } else { \
  964. KMP_YIELD_SPIN(spins); \
  965. } \
  966. __kmp_spin_backoff(&backoff); \
  967. } while ( \
  968. KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
  969. !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
  970. } \
  971. KMP_FSYNC_ACQUIRED(l); \
  972. }
  973. // Fast-path test tas lock
  974. #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
  975. { \
  976. kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
  977. kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
  978. kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
  979. rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
  980. __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
  981. }
  982. // Fast-path release tas lock
  983. #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
  984. { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
  985. #if KMP_USE_FUTEX
  986. #include <sys/syscall.h>
  987. #include <unistd.h>
  988. #ifndef FUTEX_WAIT
  989. #define FUTEX_WAIT 0
  990. #endif
  991. #ifndef FUTEX_WAKE
  992. #define FUTEX_WAKE 1
  993. #endif
  994. // Fast-path acquire futex lock
  995. #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
  996. { \
  997. kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
  998. kmp_int32 gtid_code = (gtid + 1) << 1; \
  999. KMP_MB(); \
  1000. KMP_FSYNC_PREPARE(ftx); \
  1001. kmp_int32 poll_val; \
  1002. while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
  1003. &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
  1004. KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
  1005. kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
  1006. if (!cond) { \
  1007. if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
  1008. poll_val | \
  1009. KMP_LOCK_BUSY(1, futex))) { \
  1010. continue; \
  1011. } \
  1012. poll_val |= KMP_LOCK_BUSY(1, futex); \
  1013. } \
  1014. kmp_int32 rc; \
  1015. if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
  1016. NULL, NULL, 0)) != 0) { \
  1017. continue; \
  1018. } \
  1019. gtid_code |= 1; \
  1020. } \
  1021. KMP_FSYNC_ACQUIRED(ftx); \
  1022. }
  1023. // Fast-path test futex lock
  1024. #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
  1025. { \
  1026. kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
  1027. if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
  1028. KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
  1029. KMP_FSYNC_ACQUIRED(ftx); \
  1030. rc = TRUE; \
  1031. } else { \
  1032. rc = FALSE; \
  1033. } \
  1034. }
  1035. // Fast-path release futex lock
  1036. #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
  1037. { \
  1038. kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
  1039. KMP_MB(); \
  1040. KMP_FSYNC_RELEASING(ftx); \
  1041. kmp_int32 poll_val = \
  1042. KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
  1043. if (KMP_LOCK_STRIP(poll_val) & 1) { \
  1044. syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
  1045. KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
  1046. } \
  1047. KMP_MB(); \
  1048. KMP_YIELD_OVERSUB(); \
  1049. }
  1050. #endif // KMP_USE_FUTEX
  1051. #else // KMP_USE_DYNAMIC_LOCK
  1052. static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
  1053. ident_t const *loc,
  1054. kmp_int32 gtid) {
  1055. kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
  1056. // Because of the double-check, the following load doesn't need to be volatile
  1057. kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
  1058. if (lck == NULL) {
  1059. void *idx;
  1060. // Allocate & initialize the lock.
  1061. // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
  1062. lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
  1063. __kmp_init_user_lock_with_checks(lck);
  1064. __kmp_set_user_lock_location(lck, loc);
  1065. #if USE_ITT_BUILD
  1066. __kmp_itt_critical_creating(lck);
  1067. // __kmp_itt_critical_creating() should be called *before* the first usage
  1068. // of underlying lock. It is the only place where we can guarantee it. There
  1069. // are chances the lock will destroyed with no usage, but it is not a
  1070. // problem, because this is not real event seen by user but rather setting
  1071. // name for object (lock). See more details in kmp_itt.h.
  1072. #endif /* USE_ITT_BUILD */
  1073. // Use a cmpxchg instruction to slam the start of the critical section with
  1074. // the lock pointer. If another thread beat us to it, deallocate the lock,
  1075. // and use the lock that the other thread allocated.
  1076. int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
  1077. if (status == 0) {
  1078. // Deallocate the lock and reload the value.
  1079. #if USE_ITT_BUILD
  1080. __kmp_itt_critical_destroyed(lck);
  1081. // Let ITT know the lock is destroyed and the same memory location may be reused
  1082. // for another purpose.
  1083. #endif /* USE_ITT_BUILD */
  1084. __kmp_destroy_user_lock_with_checks(lck);
  1085. __kmp_user_lock_free(&idx, gtid, lck);
  1086. lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
  1087. KMP_DEBUG_ASSERT(lck != NULL);
  1088. }
  1089. }
  1090. return lck;
  1091. }
  1092. #endif // KMP_USE_DYNAMIC_LOCK
  1093. /*!
  1094. @ingroup WORK_SHARING
  1095. @param loc source location information.
  1096. @param global_tid global thread number.
  1097. @param crit identity of the critical section. This could be a pointer to a lock
  1098. associated with the critical section, or some other suitably unique value.
  1099. Enter code protected by a `critical` construct.
  1100. This function blocks until the executing thread can enter the critical section.
  1101. */
  1102. void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
  1103. kmp_critical_name *crit) {
  1104. #if KMP_USE_DYNAMIC_LOCK
  1105. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1106. OMPT_STORE_RETURN_ADDRESS(global_tid);
  1107. #endif // OMPT_SUPPORT
  1108. __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
  1109. #else
  1110. KMP_COUNT_BLOCK(OMP_CRITICAL);
  1111. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1112. ompt_state_t prev_state = ompt_state_undefined;
  1113. ompt_thread_info_t ti;
  1114. #endif
  1115. kmp_user_lock_p lck;
  1116. KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
  1117. __kmp_assert_valid_gtid(global_tid);
  1118. // TODO: add THR_OVHD_STATE
  1119. KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
  1120. KMP_CHECK_USER_LOCK_INIT();
  1121. if ((__kmp_user_lock_kind == lk_tas) &&
  1122. (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
  1123. lck = (kmp_user_lock_p)crit;
  1124. }
  1125. #if KMP_USE_FUTEX
  1126. else if ((__kmp_user_lock_kind == lk_futex) &&
  1127. (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
  1128. lck = (kmp_user_lock_p)crit;
  1129. }
  1130. #endif
  1131. else { // ticket, queuing or drdpa
  1132. lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
  1133. }
  1134. if (__kmp_env_consistency_check)
  1135. __kmp_push_sync(global_tid, ct_critical, loc, lck);
  1136. // since the critical directive binds to all threads, not just the current
  1137. // team we have to check this even if we are in a serialized team.
  1138. // also, even if we are the uber thread, we still have to conduct the lock,
  1139. // as we have to contend with sibling threads.
  1140. #if USE_ITT_BUILD
  1141. __kmp_itt_critical_acquiring(lck);
  1142. #endif /* USE_ITT_BUILD */
  1143. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1144. OMPT_STORE_RETURN_ADDRESS(gtid);
  1145. void *codeptr_ra = NULL;
  1146. if (ompt_enabled.enabled) {
  1147. ti = __kmp_threads[global_tid]->th.ompt_thread_info;
  1148. /* OMPT state update */
  1149. prev_state = ti.state;
  1150. ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
  1151. ti.state = ompt_state_wait_critical;
  1152. /* OMPT event callback */
  1153. codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
  1154. if (ompt_enabled.ompt_callback_mutex_acquire) {
  1155. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  1156. ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
  1157. (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
  1158. }
  1159. }
  1160. #endif
  1161. // Value of 'crit' should be good for using as a critical_id of the critical
  1162. // section directive.
  1163. __kmp_acquire_user_lock_with_checks(lck, global_tid);
  1164. #if USE_ITT_BUILD
  1165. __kmp_itt_critical_acquired(lck);
  1166. #endif /* USE_ITT_BUILD */
  1167. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1168. if (ompt_enabled.enabled) {
  1169. /* OMPT state update */
  1170. ti.state = prev_state;
  1171. ti.wait_id = 0;
  1172. /* OMPT event callback */
  1173. if (ompt_enabled.ompt_callback_mutex_acquired) {
  1174. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  1175. ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
  1176. }
  1177. }
  1178. #endif
  1179. KMP_POP_PARTITIONED_TIMER();
  1180. KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
  1181. KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
  1182. #endif // KMP_USE_DYNAMIC_LOCK
  1183. }
  1184. #if KMP_USE_DYNAMIC_LOCK
  1185. // Converts the given hint to an internal lock implementation
  1186. static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
  1187. #if KMP_USE_TSX
  1188. #define KMP_TSX_LOCK(seq) lockseq_##seq
  1189. #else
  1190. #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
  1191. #endif
  1192. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1193. #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
  1194. #else
  1195. #define KMP_CPUINFO_RTM 0
  1196. #endif
  1197. // Hints that do not require further logic
  1198. if (hint & kmp_lock_hint_hle)
  1199. return KMP_TSX_LOCK(hle);
  1200. if (hint & kmp_lock_hint_rtm)
  1201. return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
  1202. if (hint & kmp_lock_hint_adaptive)
  1203. return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
  1204. // Rule out conflicting hints first by returning the default lock
  1205. if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
  1206. return __kmp_user_lock_seq;
  1207. if ((hint & omp_lock_hint_speculative) &&
  1208. (hint & omp_lock_hint_nonspeculative))
  1209. return __kmp_user_lock_seq;
  1210. // Do not even consider speculation when it appears to be contended
  1211. if (hint & omp_lock_hint_contended)
  1212. return lockseq_queuing;
  1213. // Uncontended lock without speculation
  1214. if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
  1215. return lockseq_tas;
  1216. // Use RTM lock for speculation
  1217. if (hint & omp_lock_hint_speculative)
  1218. return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
  1219. return __kmp_user_lock_seq;
  1220. }
  1221. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1222. #if KMP_USE_DYNAMIC_LOCK
  1223. static kmp_mutex_impl_t
  1224. __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
  1225. if (user_lock) {
  1226. switch (KMP_EXTRACT_D_TAG(user_lock)) {
  1227. case 0:
  1228. break;
  1229. #if KMP_USE_FUTEX
  1230. case locktag_futex:
  1231. return kmp_mutex_impl_queuing;
  1232. #endif
  1233. case locktag_tas:
  1234. return kmp_mutex_impl_spin;
  1235. #if KMP_USE_TSX
  1236. case locktag_hle:
  1237. case locktag_rtm_spin:
  1238. return kmp_mutex_impl_speculative;
  1239. #endif
  1240. default:
  1241. return kmp_mutex_impl_none;
  1242. }
  1243. ilock = KMP_LOOKUP_I_LOCK(user_lock);
  1244. }
  1245. KMP_ASSERT(ilock);
  1246. switch (ilock->type) {
  1247. #if KMP_USE_TSX
  1248. case locktag_adaptive:
  1249. case locktag_rtm_queuing:
  1250. return kmp_mutex_impl_speculative;
  1251. #endif
  1252. case locktag_nested_tas:
  1253. return kmp_mutex_impl_spin;
  1254. #if KMP_USE_FUTEX
  1255. case locktag_nested_futex:
  1256. #endif
  1257. case locktag_ticket:
  1258. case locktag_queuing:
  1259. case locktag_drdpa:
  1260. case locktag_nested_ticket:
  1261. case locktag_nested_queuing:
  1262. case locktag_nested_drdpa:
  1263. return kmp_mutex_impl_queuing;
  1264. default:
  1265. return kmp_mutex_impl_none;
  1266. }
  1267. }
  1268. #else
  1269. // For locks without dynamic binding
  1270. static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
  1271. switch (__kmp_user_lock_kind) {
  1272. case lk_tas:
  1273. return kmp_mutex_impl_spin;
  1274. #if KMP_USE_FUTEX
  1275. case lk_futex:
  1276. #endif
  1277. case lk_ticket:
  1278. case lk_queuing:
  1279. case lk_drdpa:
  1280. return kmp_mutex_impl_queuing;
  1281. #if KMP_USE_TSX
  1282. case lk_hle:
  1283. case lk_rtm_queuing:
  1284. case lk_rtm_spin:
  1285. case lk_adaptive:
  1286. return kmp_mutex_impl_speculative;
  1287. #endif
  1288. default:
  1289. return kmp_mutex_impl_none;
  1290. }
  1291. }
  1292. #endif // KMP_USE_DYNAMIC_LOCK
  1293. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  1294. /*!
  1295. @ingroup WORK_SHARING
  1296. @param loc source location information.
  1297. @param global_tid global thread number.
  1298. @param crit identity of the critical section. This could be a pointer to a lock
  1299. associated with the critical section, or some other suitably unique value.
  1300. @param hint the lock hint.
  1301. Enter code protected by a `critical` construct with a hint. The hint value is
  1302. used to suggest a lock implementation. This function blocks until the executing
  1303. thread can enter the critical section unless the hint suggests use of
  1304. speculative execution and the hardware supports it.
  1305. */
  1306. void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
  1307. kmp_critical_name *crit, uint32_t hint) {
  1308. KMP_COUNT_BLOCK(OMP_CRITICAL);
  1309. kmp_user_lock_p lck;
  1310. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1311. ompt_state_t prev_state = ompt_state_undefined;
  1312. ompt_thread_info_t ti;
  1313. // This is the case, if called from __kmpc_critical:
  1314. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
  1315. if (!codeptr)
  1316. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  1317. #endif
  1318. KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
  1319. __kmp_assert_valid_gtid(global_tid);
  1320. kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
  1321. // Check if it is initialized.
  1322. KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
  1323. kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
  1324. if (*lk == 0) {
  1325. if (KMP_IS_D_LOCK(lockseq)) {
  1326. KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
  1327. KMP_GET_D_TAG(lockseq));
  1328. } else {
  1329. __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
  1330. }
  1331. }
  1332. // Branch for accessing the actual lock object and set operation. This
  1333. // branching is inevitable since this lock initialization does not follow the
  1334. // normal dispatch path (lock table is not used).
  1335. if (KMP_EXTRACT_D_TAG(lk) != 0) {
  1336. lck = (kmp_user_lock_p)lk;
  1337. if (__kmp_env_consistency_check) {
  1338. __kmp_push_sync(global_tid, ct_critical, loc, lck,
  1339. __kmp_map_hint_to_lock(hint));
  1340. }
  1341. #if USE_ITT_BUILD
  1342. __kmp_itt_critical_acquiring(lck);
  1343. #endif
  1344. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1345. if (ompt_enabled.enabled) {
  1346. ti = __kmp_threads[global_tid]->th.ompt_thread_info;
  1347. /* OMPT state update */
  1348. prev_state = ti.state;
  1349. ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
  1350. ti.state = ompt_state_wait_critical;
  1351. /* OMPT event callback */
  1352. if (ompt_enabled.ompt_callback_mutex_acquire) {
  1353. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  1354. ompt_mutex_critical, (unsigned int)hint,
  1355. __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
  1356. codeptr);
  1357. }
  1358. }
  1359. #endif
  1360. #if KMP_USE_INLINED_TAS
  1361. if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
  1362. KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
  1363. } else
  1364. #elif KMP_USE_INLINED_FUTEX
  1365. if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
  1366. KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
  1367. } else
  1368. #endif
  1369. {
  1370. KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
  1371. }
  1372. } else {
  1373. kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
  1374. lck = ilk->lock;
  1375. if (__kmp_env_consistency_check) {
  1376. __kmp_push_sync(global_tid, ct_critical, loc, lck,
  1377. __kmp_map_hint_to_lock(hint));
  1378. }
  1379. #if USE_ITT_BUILD
  1380. __kmp_itt_critical_acquiring(lck);
  1381. #endif
  1382. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1383. if (ompt_enabled.enabled) {
  1384. ti = __kmp_threads[global_tid]->th.ompt_thread_info;
  1385. /* OMPT state update */
  1386. prev_state = ti.state;
  1387. ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
  1388. ti.state = ompt_state_wait_critical;
  1389. /* OMPT event callback */
  1390. if (ompt_enabled.ompt_callback_mutex_acquire) {
  1391. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  1392. ompt_mutex_critical, (unsigned int)hint,
  1393. __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
  1394. codeptr);
  1395. }
  1396. }
  1397. #endif
  1398. KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
  1399. }
  1400. KMP_POP_PARTITIONED_TIMER();
  1401. #if USE_ITT_BUILD
  1402. __kmp_itt_critical_acquired(lck);
  1403. #endif /* USE_ITT_BUILD */
  1404. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1405. if (ompt_enabled.enabled) {
  1406. /* OMPT state update */
  1407. ti.state = prev_state;
  1408. ti.wait_id = 0;
  1409. /* OMPT event callback */
  1410. if (ompt_enabled.ompt_callback_mutex_acquired) {
  1411. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  1412. ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  1413. }
  1414. }
  1415. #endif
  1416. KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
  1417. KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
  1418. } // __kmpc_critical_with_hint
  1419. #endif // KMP_USE_DYNAMIC_LOCK
  1420. /*!
  1421. @ingroup WORK_SHARING
  1422. @param loc source location information.
  1423. @param global_tid global thread number .
  1424. @param crit identity of the critical section. This could be a pointer to a lock
  1425. associated with the critical section, or some other suitably unique value.
  1426. Leave a critical section, releasing any lock that was held during its execution.
  1427. */
  1428. void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
  1429. kmp_critical_name *crit) {
  1430. kmp_user_lock_p lck;
  1431. KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
  1432. #if KMP_USE_DYNAMIC_LOCK
  1433. int locktag = KMP_EXTRACT_D_TAG(crit);
  1434. if (locktag) {
  1435. lck = (kmp_user_lock_p)crit;
  1436. KMP_ASSERT(lck != NULL);
  1437. if (__kmp_env_consistency_check) {
  1438. __kmp_pop_sync(global_tid, ct_critical, loc);
  1439. }
  1440. #if USE_ITT_BUILD
  1441. __kmp_itt_critical_releasing(lck);
  1442. #endif
  1443. #if KMP_USE_INLINED_TAS
  1444. if (locktag == locktag_tas && !__kmp_env_consistency_check) {
  1445. KMP_RELEASE_TAS_LOCK(lck, global_tid);
  1446. } else
  1447. #elif KMP_USE_INLINED_FUTEX
  1448. if (locktag == locktag_futex && !__kmp_env_consistency_check) {
  1449. KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
  1450. } else
  1451. #endif
  1452. {
  1453. KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
  1454. }
  1455. } else {
  1456. kmp_indirect_lock_t *ilk =
  1457. (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
  1458. KMP_ASSERT(ilk != NULL);
  1459. lck = ilk->lock;
  1460. if (__kmp_env_consistency_check) {
  1461. __kmp_pop_sync(global_tid, ct_critical, loc);
  1462. }
  1463. #if USE_ITT_BUILD
  1464. __kmp_itt_critical_releasing(lck);
  1465. #endif
  1466. KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
  1467. }
  1468. #else // KMP_USE_DYNAMIC_LOCK
  1469. if ((__kmp_user_lock_kind == lk_tas) &&
  1470. (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
  1471. lck = (kmp_user_lock_p)crit;
  1472. }
  1473. #if KMP_USE_FUTEX
  1474. else if ((__kmp_user_lock_kind == lk_futex) &&
  1475. (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
  1476. lck = (kmp_user_lock_p)crit;
  1477. }
  1478. #endif
  1479. else { // ticket, queuing or drdpa
  1480. lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
  1481. }
  1482. KMP_ASSERT(lck != NULL);
  1483. if (__kmp_env_consistency_check)
  1484. __kmp_pop_sync(global_tid, ct_critical, loc);
  1485. #if USE_ITT_BUILD
  1486. __kmp_itt_critical_releasing(lck);
  1487. #endif /* USE_ITT_BUILD */
  1488. // Value of 'crit' should be good for using as a critical_id of the critical
  1489. // section directive.
  1490. __kmp_release_user_lock_with_checks(lck, global_tid);
  1491. #endif // KMP_USE_DYNAMIC_LOCK
  1492. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1493. /* OMPT release event triggers after lock is released; place here to trigger
  1494. * for all #if branches */
  1495. OMPT_STORE_RETURN_ADDRESS(global_tid);
  1496. if (ompt_enabled.ompt_callback_mutex_released) {
  1497. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  1498. ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
  1499. OMPT_LOAD_RETURN_ADDRESS(0));
  1500. }
  1501. #endif
  1502. KMP_POP_PARTITIONED_TIMER();
  1503. KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
  1504. }
  1505. /*!
  1506. @ingroup SYNCHRONIZATION
  1507. @param loc source location information
  1508. @param global_tid thread id.
  1509. @return one if the thread should execute the master block, zero otherwise
  1510. Start execution of a combined barrier and master. The barrier is executed inside
  1511. this function.
  1512. */
  1513. kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
  1514. int status;
  1515. KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
  1516. __kmp_assert_valid_gtid(global_tid);
  1517. if (!TCR_4(__kmp_init_parallel))
  1518. __kmp_parallel_initialize();
  1519. __kmp_resume_if_soft_paused();
  1520. if (__kmp_env_consistency_check)
  1521. __kmp_check_barrier(global_tid, ct_barrier, loc);
  1522. #if OMPT_SUPPORT
  1523. ompt_frame_t *ompt_frame;
  1524. if (ompt_enabled.enabled) {
  1525. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  1526. if (ompt_frame->enter_frame.ptr == NULL)
  1527. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  1528. }
  1529. OMPT_STORE_RETURN_ADDRESS(global_tid);
  1530. #endif
  1531. #if USE_ITT_NOTIFY
  1532. __kmp_threads[global_tid]->th.th_ident = loc;
  1533. #endif
  1534. status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
  1535. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1536. if (ompt_enabled.enabled) {
  1537. ompt_frame->enter_frame = ompt_data_none;
  1538. }
  1539. #endif
  1540. return (status != 0) ? 0 : 1;
  1541. }
  1542. /*!
  1543. @ingroup SYNCHRONIZATION
  1544. @param loc source location information
  1545. @param global_tid thread id.
  1546. Complete the execution of a combined barrier and master. This function should
  1547. only be called at the completion of the <tt>master</tt> code. Other threads will
  1548. still be waiting at the barrier and this call releases them.
  1549. */
  1550. void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
  1551. KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
  1552. __kmp_assert_valid_gtid(global_tid);
  1553. __kmp_end_split_barrier(bs_plain_barrier, global_tid);
  1554. }
  1555. /*!
  1556. @ingroup SYNCHRONIZATION
  1557. @param loc source location information
  1558. @param global_tid thread id.
  1559. @return one if the thread should execute the master block, zero otherwise
  1560. Start execution of a combined barrier and master(nowait) construct.
  1561. The barrier is executed inside this function.
  1562. There is no equivalent "end" function, since the
  1563. */
  1564. kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
  1565. kmp_int32 ret;
  1566. KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
  1567. __kmp_assert_valid_gtid(global_tid);
  1568. if (!TCR_4(__kmp_init_parallel))
  1569. __kmp_parallel_initialize();
  1570. __kmp_resume_if_soft_paused();
  1571. if (__kmp_env_consistency_check) {
  1572. if (loc == 0) {
  1573. KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
  1574. }
  1575. __kmp_check_barrier(global_tid, ct_barrier, loc);
  1576. }
  1577. #if OMPT_SUPPORT
  1578. ompt_frame_t *ompt_frame;
  1579. if (ompt_enabled.enabled) {
  1580. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  1581. if (ompt_frame->enter_frame.ptr == NULL)
  1582. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  1583. }
  1584. OMPT_STORE_RETURN_ADDRESS(global_tid);
  1585. #endif
  1586. #if USE_ITT_NOTIFY
  1587. __kmp_threads[global_tid]->th.th_ident = loc;
  1588. #endif
  1589. __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
  1590. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1591. if (ompt_enabled.enabled) {
  1592. ompt_frame->enter_frame = ompt_data_none;
  1593. }
  1594. #endif
  1595. ret = __kmpc_master(loc, global_tid);
  1596. if (__kmp_env_consistency_check) {
  1597. /* there's no __kmpc_end_master called; so the (stats) */
  1598. /* actions of __kmpc_end_master are done here */
  1599. if (ret) {
  1600. /* only one thread should do the pop since only */
  1601. /* one did the push (see __kmpc_master()) */
  1602. __kmp_pop_sync(global_tid, ct_master, loc);
  1603. }
  1604. }
  1605. return (ret);
  1606. }
  1607. /* The BARRIER for a SINGLE process section is always explicit */
  1608. /*!
  1609. @ingroup WORK_SHARING
  1610. @param loc source location information
  1611. @param global_tid global thread number
  1612. @return One if this thread should execute the single construct, zero otherwise.
  1613. Test whether to execute a <tt>single</tt> construct.
  1614. There are no implicit barriers in the two "single" calls, rather the compiler
  1615. should introduce an explicit barrier if it is required.
  1616. */
  1617. kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
  1618. __kmp_assert_valid_gtid(global_tid);
  1619. kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
  1620. if (rc) {
  1621. // We are going to execute the single statement, so we should count it.
  1622. KMP_COUNT_BLOCK(OMP_SINGLE);
  1623. KMP_PUSH_PARTITIONED_TIMER(OMP_single);
  1624. }
  1625. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1626. kmp_info_t *this_thr = __kmp_threads[global_tid];
  1627. kmp_team_t *team = this_thr->th.th_team;
  1628. int tid = __kmp_tid_from_gtid(global_tid);
  1629. if (ompt_enabled.enabled) {
  1630. if (rc) {
  1631. if (ompt_enabled.ompt_callback_work) {
  1632. ompt_callbacks.ompt_callback(ompt_callback_work)(
  1633. ompt_work_single_executor, ompt_scope_begin,
  1634. &(team->t.ompt_team_info.parallel_data),
  1635. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  1636. 1, OMPT_GET_RETURN_ADDRESS(0));
  1637. }
  1638. } else {
  1639. if (ompt_enabled.ompt_callback_work) {
  1640. ompt_callbacks.ompt_callback(ompt_callback_work)(
  1641. ompt_work_single_other, ompt_scope_begin,
  1642. &(team->t.ompt_team_info.parallel_data),
  1643. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  1644. 1, OMPT_GET_RETURN_ADDRESS(0));
  1645. ompt_callbacks.ompt_callback(ompt_callback_work)(
  1646. ompt_work_single_other, ompt_scope_end,
  1647. &(team->t.ompt_team_info.parallel_data),
  1648. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
  1649. 1, OMPT_GET_RETURN_ADDRESS(0));
  1650. }
  1651. }
  1652. }
  1653. #endif
  1654. return rc;
  1655. }
  1656. /*!
  1657. @ingroup WORK_SHARING
  1658. @param loc source location information
  1659. @param global_tid global thread number
  1660. Mark the end of a <tt>single</tt> construct. This function should
  1661. only be called by the thread that executed the block of code protected
  1662. by the `single` construct.
  1663. */
  1664. void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
  1665. __kmp_assert_valid_gtid(global_tid);
  1666. __kmp_exit_single(global_tid);
  1667. KMP_POP_PARTITIONED_TIMER();
  1668. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1669. kmp_info_t *this_thr = __kmp_threads[global_tid];
  1670. kmp_team_t *team = this_thr->th.th_team;
  1671. int tid = __kmp_tid_from_gtid(global_tid);
  1672. if (ompt_enabled.ompt_callback_work) {
  1673. ompt_callbacks.ompt_callback(ompt_callback_work)(
  1674. ompt_work_single_executor, ompt_scope_end,
  1675. &(team->t.ompt_team_info.parallel_data),
  1676. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
  1677. OMPT_GET_RETURN_ADDRESS(0));
  1678. }
  1679. #endif
  1680. }
  1681. /*!
  1682. @ingroup WORK_SHARING
  1683. @param loc Source location
  1684. @param global_tid Global thread id
  1685. Mark the end of a statically scheduled loop.
  1686. */
  1687. void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
  1688. KMP_POP_PARTITIONED_TIMER();
  1689. KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
  1690. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1691. if (ompt_enabled.ompt_callback_work) {
  1692. ompt_work_t ompt_work_type = ompt_work_loop;
  1693. ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
  1694. ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
  1695. // Determine workshare type
  1696. if (loc != NULL) {
  1697. if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
  1698. ompt_work_type = ompt_work_loop;
  1699. } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
  1700. ompt_work_type = ompt_work_sections;
  1701. } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
  1702. ompt_work_type = ompt_work_distribute;
  1703. } else {
  1704. // use default set above.
  1705. // a warning about this case is provided in __kmpc_for_static_init
  1706. }
  1707. KMP_DEBUG_ASSERT(ompt_work_type);
  1708. }
  1709. ompt_callbacks.ompt_callback(ompt_callback_work)(
  1710. ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
  1711. &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
  1712. }
  1713. #endif
  1714. if (__kmp_env_consistency_check)
  1715. __kmp_pop_workshare(global_tid, ct_pdo, loc);
  1716. }
  1717. // User routines which take C-style arguments (call by value)
  1718. // different from the Fortran equivalent routines
  1719. void ompc_set_num_threads(int arg) {
  1720. // !!!!! TODO: check the per-task binding
  1721. __kmp_set_num_threads(arg, __kmp_entry_gtid());
  1722. }
  1723. void ompc_set_dynamic(int flag) {
  1724. kmp_info_t *thread;
  1725. /* For the thread-private implementation of the internal controls */
  1726. thread = __kmp_entry_thread();
  1727. __kmp_save_internal_controls(thread);
  1728. set__dynamic(thread, flag ? true : false);
  1729. }
  1730. void ompc_set_nested(int flag) {
  1731. kmp_info_t *thread;
  1732. /* For the thread-private internal controls implementation */
  1733. thread = __kmp_entry_thread();
  1734. __kmp_save_internal_controls(thread);
  1735. set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
  1736. }
  1737. void ompc_set_max_active_levels(int max_active_levels) {
  1738. /* TO DO */
  1739. /* we want per-task implementation of this internal control */
  1740. /* For the per-thread internal controls implementation */
  1741. __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
  1742. }
  1743. void ompc_set_schedule(omp_sched_t kind, int modifier) {
  1744. // !!!!! TODO: check the per-task binding
  1745. __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
  1746. }
  1747. int ompc_get_ancestor_thread_num(int level) {
  1748. return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
  1749. }
  1750. int ompc_get_team_size(int level) {
  1751. return __kmp_get_team_size(__kmp_entry_gtid(), level);
  1752. }
  1753. /* OpenMP 5.0 Affinity Format API */
  1754. void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
  1755. if (!__kmp_init_serial) {
  1756. __kmp_serial_initialize();
  1757. }
  1758. __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
  1759. format, KMP_STRLEN(format) + 1);
  1760. }
  1761. size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
  1762. size_t format_size;
  1763. if (!__kmp_init_serial) {
  1764. __kmp_serial_initialize();
  1765. }
  1766. format_size = KMP_STRLEN(__kmp_affinity_format);
  1767. if (buffer && size) {
  1768. __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
  1769. format_size + 1);
  1770. }
  1771. return format_size;
  1772. }
  1773. void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
  1774. int gtid;
  1775. if (!TCR_4(__kmp_init_middle)) {
  1776. __kmp_middle_initialize();
  1777. }
  1778. __kmp_assign_root_init_mask();
  1779. gtid = __kmp_get_gtid();
  1780. #if KMP_AFFINITY_SUPPORTED
  1781. if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
  1782. __kmp_reset_root_init_mask(gtid);
  1783. }
  1784. #endif
  1785. __kmp_aux_display_affinity(gtid, format);
  1786. }
  1787. size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
  1788. char const *format) {
  1789. int gtid;
  1790. size_t num_required;
  1791. kmp_str_buf_t capture_buf;
  1792. if (!TCR_4(__kmp_init_middle)) {
  1793. __kmp_middle_initialize();
  1794. }
  1795. __kmp_assign_root_init_mask();
  1796. gtid = __kmp_get_gtid();
  1797. #if KMP_AFFINITY_SUPPORTED
  1798. if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
  1799. __kmp_reset_root_init_mask(gtid);
  1800. }
  1801. #endif
  1802. __kmp_str_buf_init(&capture_buf);
  1803. num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
  1804. if (buffer && buf_size) {
  1805. __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
  1806. capture_buf.used + 1);
  1807. }
  1808. __kmp_str_buf_free(&capture_buf);
  1809. return num_required;
  1810. }
  1811. void kmpc_set_stacksize(int arg) {
  1812. // __kmp_aux_set_stacksize initializes the library if needed
  1813. __kmp_aux_set_stacksize(arg);
  1814. }
  1815. void kmpc_set_stacksize_s(size_t arg) {
  1816. // __kmp_aux_set_stacksize initializes the library if needed
  1817. __kmp_aux_set_stacksize(arg);
  1818. }
  1819. void kmpc_set_blocktime(int arg) {
  1820. int gtid, tid;
  1821. kmp_info_t *thread;
  1822. gtid = __kmp_entry_gtid();
  1823. tid = __kmp_tid_from_gtid(gtid);
  1824. thread = __kmp_thread_from_gtid(gtid);
  1825. __kmp_aux_set_blocktime(arg, thread, tid);
  1826. }
  1827. void kmpc_set_library(int arg) {
  1828. // __kmp_user_set_library initializes the library if needed
  1829. __kmp_user_set_library((enum library_type)arg);
  1830. }
  1831. void kmpc_set_defaults(char const *str) {
  1832. // __kmp_aux_set_defaults initializes the library if needed
  1833. __kmp_aux_set_defaults(str, KMP_STRLEN(str));
  1834. }
  1835. void kmpc_set_disp_num_buffers(int arg) {
  1836. // ignore after initialization because some teams have already
  1837. // allocated dispatch buffers
  1838. if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
  1839. arg <= KMP_MAX_DISP_NUM_BUFF) {
  1840. __kmp_dispatch_num_buffers = arg;
  1841. }
  1842. }
  1843. int kmpc_set_affinity_mask_proc(int proc, void **mask) {
  1844. #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
  1845. return -1;
  1846. #else
  1847. if (!TCR_4(__kmp_init_middle)) {
  1848. __kmp_middle_initialize();
  1849. }
  1850. __kmp_assign_root_init_mask();
  1851. return __kmp_aux_set_affinity_mask_proc(proc, mask);
  1852. #endif
  1853. }
  1854. int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
  1855. #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
  1856. return -1;
  1857. #else
  1858. if (!TCR_4(__kmp_init_middle)) {
  1859. __kmp_middle_initialize();
  1860. }
  1861. __kmp_assign_root_init_mask();
  1862. return __kmp_aux_unset_affinity_mask_proc(proc, mask);
  1863. #endif
  1864. }
  1865. int kmpc_get_affinity_mask_proc(int proc, void **mask) {
  1866. #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
  1867. return -1;
  1868. #else
  1869. if (!TCR_4(__kmp_init_middle)) {
  1870. __kmp_middle_initialize();
  1871. }
  1872. __kmp_assign_root_init_mask();
  1873. return __kmp_aux_get_affinity_mask_proc(proc, mask);
  1874. #endif
  1875. }
  1876. /* -------------------------------------------------------------------------- */
  1877. /*!
  1878. @ingroup THREADPRIVATE
  1879. @param loc source location information
  1880. @param gtid global thread number
  1881. @param cpy_size size of the cpy_data buffer
  1882. @param cpy_data pointer to data to be copied
  1883. @param cpy_func helper function to call for copying data
  1884. @param didit flag variable: 1=single thread; 0=not single thread
  1885. __kmpc_copyprivate implements the interface for the private data broadcast
  1886. needed for the copyprivate clause associated with a single region in an
  1887. OpenMP<sup>*</sup> program (both C and Fortran).
  1888. All threads participating in the parallel region call this routine.
  1889. One of the threads (called the single thread) should have the <tt>didit</tt>
  1890. variable set to 1 and all other threads should have that variable set to 0.
  1891. All threads pass a pointer to a data buffer (cpy_data) that they have built.
  1892. The OpenMP specification forbids the use of nowait on the single region when a
  1893. copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
  1894. barrier internally to avoid race conditions, so the code generation for the
  1895. single region should avoid generating a barrier after the call to @ref
  1896. __kmpc_copyprivate.
  1897. The <tt>gtid</tt> parameter is the global thread id for the current thread.
  1898. The <tt>loc</tt> parameter is a pointer to source location information.
  1899. Internal implementation: The single thread will first copy its descriptor
  1900. address (cpy_data) to a team-private location, then the other threads will each
  1901. call the function pointed to by the parameter cpy_func, which carries out the
  1902. copy by copying the data using the cpy_data buffer.
  1903. The cpy_func routine used for the copy and the contents of the data area defined
  1904. by cpy_data and cpy_size may be built in any fashion that will allow the copy
  1905. to be done. For instance, the cpy_data buffer can hold the actual data to be
  1906. copied or it may hold a list of pointers to the data. The cpy_func routine must
  1907. interpret the cpy_data buffer appropriately.
  1908. The interface to cpy_func is as follows:
  1909. @code
  1910. void cpy_func( void *destination, void *source )
  1911. @endcode
  1912. where void *destination is the cpy_data pointer for the thread being copied to
  1913. and void *source is the cpy_data pointer for the thread being copied from.
  1914. */
  1915. void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
  1916. void *cpy_data, void (*cpy_func)(void *, void *),
  1917. kmp_int32 didit) {
  1918. void **data_ptr;
  1919. KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
  1920. __kmp_assert_valid_gtid(gtid);
  1921. KMP_MB();
  1922. data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
  1923. if (__kmp_env_consistency_check) {
  1924. if (loc == 0) {
  1925. KMP_WARNING(ConstructIdentInvalid);
  1926. }
  1927. }
  1928. // ToDo: Optimize the following two barriers into some kind of split barrier
  1929. if (didit)
  1930. *data_ptr = cpy_data;
  1931. #if OMPT_SUPPORT
  1932. ompt_frame_t *ompt_frame;
  1933. if (ompt_enabled.enabled) {
  1934. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  1935. if (ompt_frame->enter_frame.ptr == NULL)
  1936. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  1937. }
  1938. OMPT_STORE_RETURN_ADDRESS(gtid);
  1939. #endif
  1940. /* This barrier is not a barrier region boundary */
  1941. #if USE_ITT_NOTIFY
  1942. __kmp_threads[gtid]->th.th_ident = loc;
  1943. #endif
  1944. __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
  1945. if (!didit)
  1946. (*cpy_func)(cpy_data, *data_ptr);
  1947. // Consider next barrier a user-visible barrier for barrier region boundaries
  1948. // Nesting checks are already handled by the single construct checks
  1949. {
  1950. #if OMPT_SUPPORT
  1951. OMPT_STORE_RETURN_ADDRESS(gtid);
  1952. #endif
  1953. #if USE_ITT_NOTIFY
  1954. __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
  1955. // tasks can overwrite the location)
  1956. #endif
  1957. __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
  1958. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1959. if (ompt_enabled.enabled) {
  1960. ompt_frame->enter_frame = ompt_data_none;
  1961. }
  1962. #endif
  1963. }
  1964. }
  1965. /* --------------------------------------------------------------------------*/
  1966. /*!
  1967. @ingroup THREADPRIVATE
  1968. @param loc source location information
  1969. @param gtid global thread number
  1970. @param cpy_data pointer to the data to be saved/copied or 0
  1971. @return the saved pointer to the data
  1972. __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
  1973. __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
  1974. coming from single), and returns that pointer in all calls (for single thread
  1975. it's not needed). This version doesn't do any actual data copying. Data copying
  1976. has to be done somewhere else, e.g. inline in the generated code. Due to this,
  1977. this function doesn't have any barrier at the end of the function, like
  1978. __kmpc_copyprivate does, so generated code needs barrier after copying of all
  1979. data was done.
  1980. */
  1981. void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
  1982. void **data_ptr;
  1983. KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
  1984. KMP_MB();
  1985. data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
  1986. if (__kmp_env_consistency_check) {
  1987. if (loc == 0) {
  1988. KMP_WARNING(ConstructIdentInvalid);
  1989. }
  1990. }
  1991. // ToDo: Optimize the following barrier
  1992. if (cpy_data)
  1993. *data_ptr = cpy_data;
  1994. #if OMPT_SUPPORT
  1995. ompt_frame_t *ompt_frame;
  1996. if (ompt_enabled.enabled) {
  1997. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  1998. if (ompt_frame->enter_frame.ptr == NULL)
  1999. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  2000. OMPT_STORE_RETURN_ADDRESS(gtid);
  2001. }
  2002. #endif
  2003. /* This barrier is not a barrier region boundary */
  2004. #if USE_ITT_NOTIFY
  2005. __kmp_threads[gtid]->th.th_ident = loc;
  2006. #endif
  2007. __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
  2008. return *data_ptr;
  2009. }
  2010. /* -------------------------------------------------------------------------- */
  2011. #define INIT_LOCK __kmp_init_user_lock_with_checks
  2012. #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
  2013. #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
  2014. #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
  2015. #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
  2016. #define ACQUIRE_NESTED_LOCK_TIMED \
  2017. __kmp_acquire_nested_user_lock_with_checks_timed
  2018. #define RELEASE_LOCK __kmp_release_user_lock_with_checks
  2019. #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
  2020. #define TEST_LOCK __kmp_test_user_lock_with_checks
  2021. #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
  2022. #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
  2023. #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
  2024. // TODO: Make check abort messages use location info & pass it into
  2025. // with_checks routines
  2026. #if KMP_USE_DYNAMIC_LOCK
  2027. // internal lock initializer
  2028. static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
  2029. kmp_dyna_lockseq_t seq) {
  2030. if (KMP_IS_D_LOCK(seq)) {
  2031. KMP_INIT_D_LOCK(lock, seq);
  2032. #if USE_ITT_BUILD
  2033. __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
  2034. #endif
  2035. } else {
  2036. KMP_INIT_I_LOCK(lock, seq);
  2037. #if USE_ITT_BUILD
  2038. kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
  2039. __kmp_itt_lock_creating(ilk->lock, loc);
  2040. #endif
  2041. }
  2042. }
  2043. // internal nest lock initializer
  2044. static __forceinline void
  2045. __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
  2046. kmp_dyna_lockseq_t seq) {
  2047. #if KMP_USE_TSX
  2048. // Don't have nested lock implementation for speculative locks
  2049. if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
  2050. seq == lockseq_rtm_spin || seq == lockseq_adaptive)
  2051. seq = __kmp_user_lock_seq;
  2052. #endif
  2053. switch (seq) {
  2054. case lockseq_tas:
  2055. seq = lockseq_nested_tas;
  2056. break;
  2057. #if KMP_USE_FUTEX
  2058. case lockseq_futex:
  2059. seq = lockseq_nested_futex;
  2060. break;
  2061. #endif
  2062. case lockseq_ticket:
  2063. seq = lockseq_nested_ticket;
  2064. break;
  2065. case lockseq_queuing:
  2066. seq = lockseq_nested_queuing;
  2067. break;
  2068. case lockseq_drdpa:
  2069. seq = lockseq_nested_drdpa;
  2070. break;
  2071. default:
  2072. seq = lockseq_nested_queuing;
  2073. }
  2074. KMP_INIT_I_LOCK(lock, seq);
  2075. #if USE_ITT_BUILD
  2076. kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
  2077. __kmp_itt_lock_creating(ilk->lock, loc);
  2078. #endif
  2079. }
  2080. /* initialize the lock with a hint */
  2081. void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
  2082. uintptr_t hint) {
  2083. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2084. if (__kmp_env_consistency_check && user_lock == NULL) {
  2085. KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
  2086. }
  2087. __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
  2088. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2089. // This is the case, if called from omp_init_lock_with_hint:
  2090. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2091. if (!codeptr)
  2092. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2093. if (ompt_enabled.ompt_callback_lock_init) {
  2094. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2095. ompt_mutex_lock, (omp_lock_hint_t)hint,
  2096. __ompt_get_mutex_impl_type(user_lock),
  2097. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2098. }
  2099. #endif
  2100. }
  2101. /* initialize the lock with a hint */
  2102. void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
  2103. void **user_lock, uintptr_t hint) {
  2104. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2105. if (__kmp_env_consistency_check && user_lock == NULL) {
  2106. KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
  2107. }
  2108. __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
  2109. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2110. // This is the case, if called from omp_init_lock_with_hint:
  2111. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2112. if (!codeptr)
  2113. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2114. if (ompt_enabled.ompt_callback_lock_init) {
  2115. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2116. ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
  2117. __ompt_get_mutex_impl_type(user_lock),
  2118. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2119. }
  2120. #endif
  2121. }
  2122. #endif // KMP_USE_DYNAMIC_LOCK
  2123. /* initialize the lock */
  2124. void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2125. #if KMP_USE_DYNAMIC_LOCK
  2126. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2127. if (__kmp_env_consistency_check && user_lock == NULL) {
  2128. KMP_FATAL(LockIsUninitialized, "omp_init_lock");
  2129. }
  2130. __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
  2131. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2132. // This is the case, if called from omp_init_lock_with_hint:
  2133. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2134. if (!codeptr)
  2135. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2136. if (ompt_enabled.ompt_callback_lock_init) {
  2137. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2138. ompt_mutex_lock, omp_lock_hint_none,
  2139. __ompt_get_mutex_impl_type(user_lock),
  2140. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2141. }
  2142. #endif
  2143. #else // KMP_USE_DYNAMIC_LOCK
  2144. static char const *const func = "omp_init_lock";
  2145. kmp_user_lock_p lck;
  2146. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2147. if (__kmp_env_consistency_check) {
  2148. if (user_lock == NULL) {
  2149. KMP_FATAL(LockIsUninitialized, func);
  2150. }
  2151. }
  2152. KMP_CHECK_USER_LOCK_INIT();
  2153. if ((__kmp_user_lock_kind == lk_tas) &&
  2154. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2155. lck = (kmp_user_lock_p)user_lock;
  2156. }
  2157. #if KMP_USE_FUTEX
  2158. else if ((__kmp_user_lock_kind == lk_futex) &&
  2159. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2160. lck = (kmp_user_lock_p)user_lock;
  2161. }
  2162. #endif
  2163. else {
  2164. lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
  2165. }
  2166. INIT_LOCK(lck);
  2167. __kmp_set_user_lock_location(lck, loc);
  2168. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2169. // This is the case, if called from omp_init_lock_with_hint:
  2170. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2171. if (!codeptr)
  2172. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2173. if (ompt_enabled.ompt_callback_lock_init) {
  2174. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2175. ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
  2176. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2177. }
  2178. #endif
  2179. #if USE_ITT_BUILD
  2180. __kmp_itt_lock_creating(lck);
  2181. #endif /* USE_ITT_BUILD */
  2182. #endif // KMP_USE_DYNAMIC_LOCK
  2183. } // __kmpc_init_lock
  2184. /* initialize the lock */
  2185. void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2186. #if KMP_USE_DYNAMIC_LOCK
  2187. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2188. if (__kmp_env_consistency_check && user_lock == NULL) {
  2189. KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
  2190. }
  2191. __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
  2192. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2193. // This is the case, if called from omp_init_lock_with_hint:
  2194. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2195. if (!codeptr)
  2196. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2197. if (ompt_enabled.ompt_callback_lock_init) {
  2198. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2199. ompt_mutex_nest_lock, omp_lock_hint_none,
  2200. __ompt_get_mutex_impl_type(user_lock),
  2201. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2202. }
  2203. #endif
  2204. #else // KMP_USE_DYNAMIC_LOCK
  2205. static char const *const func = "omp_init_nest_lock";
  2206. kmp_user_lock_p lck;
  2207. KMP_DEBUG_ASSERT(__kmp_init_serial);
  2208. if (__kmp_env_consistency_check) {
  2209. if (user_lock == NULL) {
  2210. KMP_FATAL(LockIsUninitialized, func);
  2211. }
  2212. }
  2213. KMP_CHECK_USER_LOCK_INIT();
  2214. if ((__kmp_user_lock_kind == lk_tas) &&
  2215. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2216. OMP_NEST_LOCK_T_SIZE)) {
  2217. lck = (kmp_user_lock_p)user_lock;
  2218. }
  2219. #if KMP_USE_FUTEX
  2220. else if ((__kmp_user_lock_kind == lk_futex) &&
  2221. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2222. OMP_NEST_LOCK_T_SIZE)) {
  2223. lck = (kmp_user_lock_p)user_lock;
  2224. }
  2225. #endif
  2226. else {
  2227. lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
  2228. }
  2229. INIT_NESTED_LOCK(lck);
  2230. __kmp_set_user_lock_location(lck, loc);
  2231. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2232. // This is the case, if called from omp_init_lock_with_hint:
  2233. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2234. if (!codeptr)
  2235. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2236. if (ompt_enabled.ompt_callback_lock_init) {
  2237. ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
  2238. ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
  2239. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2240. }
  2241. #endif
  2242. #if USE_ITT_BUILD
  2243. __kmp_itt_lock_creating(lck);
  2244. #endif /* USE_ITT_BUILD */
  2245. #endif // KMP_USE_DYNAMIC_LOCK
  2246. } // __kmpc_init_nest_lock
  2247. void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2248. #if KMP_USE_DYNAMIC_LOCK
  2249. #if USE_ITT_BUILD
  2250. kmp_user_lock_p lck;
  2251. if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
  2252. lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
  2253. } else {
  2254. lck = (kmp_user_lock_p)user_lock;
  2255. }
  2256. __kmp_itt_lock_destroyed(lck);
  2257. #endif
  2258. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2259. // This is the case, if called from omp_init_lock_with_hint:
  2260. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2261. if (!codeptr)
  2262. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2263. if (ompt_enabled.ompt_callback_lock_destroy) {
  2264. ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
  2265. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2266. }
  2267. #endif
  2268. KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
  2269. #else
  2270. kmp_user_lock_p lck;
  2271. if ((__kmp_user_lock_kind == lk_tas) &&
  2272. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2273. lck = (kmp_user_lock_p)user_lock;
  2274. }
  2275. #if KMP_USE_FUTEX
  2276. else if ((__kmp_user_lock_kind == lk_futex) &&
  2277. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2278. lck = (kmp_user_lock_p)user_lock;
  2279. }
  2280. #endif
  2281. else {
  2282. lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
  2283. }
  2284. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2285. // This is the case, if called from omp_init_lock_with_hint:
  2286. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2287. if (!codeptr)
  2288. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2289. if (ompt_enabled.ompt_callback_lock_destroy) {
  2290. ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
  2291. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2292. }
  2293. #endif
  2294. #if USE_ITT_BUILD
  2295. __kmp_itt_lock_destroyed(lck);
  2296. #endif /* USE_ITT_BUILD */
  2297. DESTROY_LOCK(lck);
  2298. if ((__kmp_user_lock_kind == lk_tas) &&
  2299. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2300. ;
  2301. }
  2302. #if KMP_USE_FUTEX
  2303. else if ((__kmp_user_lock_kind == lk_futex) &&
  2304. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2305. ;
  2306. }
  2307. #endif
  2308. else {
  2309. __kmp_user_lock_free(user_lock, gtid, lck);
  2310. }
  2311. #endif // KMP_USE_DYNAMIC_LOCK
  2312. } // __kmpc_destroy_lock
  2313. /* destroy the lock */
  2314. void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2315. #if KMP_USE_DYNAMIC_LOCK
  2316. #if USE_ITT_BUILD
  2317. kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
  2318. __kmp_itt_lock_destroyed(ilk->lock);
  2319. #endif
  2320. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2321. // This is the case, if called from omp_init_lock_with_hint:
  2322. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2323. if (!codeptr)
  2324. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2325. if (ompt_enabled.ompt_callback_lock_destroy) {
  2326. ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
  2327. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2328. }
  2329. #endif
  2330. KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
  2331. #else // KMP_USE_DYNAMIC_LOCK
  2332. kmp_user_lock_p lck;
  2333. if ((__kmp_user_lock_kind == lk_tas) &&
  2334. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2335. OMP_NEST_LOCK_T_SIZE)) {
  2336. lck = (kmp_user_lock_p)user_lock;
  2337. }
  2338. #if KMP_USE_FUTEX
  2339. else if ((__kmp_user_lock_kind == lk_futex) &&
  2340. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2341. OMP_NEST_LOCK_T_SIZE)) {
  2342. lck = (kmp_user_lock_p)user_lock;
  2343. }
  2344. #endif
  2345. else {
  2346. lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
  2347. }
  2348. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2349. // This is the case, if called from omp_init_lock_with_hint:
  2350. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2351. if (!codeptr)
  2352. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2353. if (ompt_enabled.ompt_callback_lock_destroy) {
  2354. ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
  2355. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2356. }
  2357. #endif
  2358. #if USE_ITT_BUILD
  2359. __kmp_itt_lock_destroyed(lck);
  2360. #endif /* USE_ITT_BUILD */
  2361. DESTROY_NESTED_LOCK(lck);
  2362. if ((__kmp_user_lock_kind == lk_tas) &&
  2363. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2364. OMP_NEST_LOCK_T_SIZE)) {
  2365. ;
  2366. }
  2367. #if KMP_USE_FUTEX
  2368. else if ((__kmp_user_lock_kind == lk_futex) &&
  2369. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2370. OMP_NEST_LOCK_T_SIZE)) {
  2371. ;
  2372. }
  2373. #endif
  2374. else {
  2375. __kmp_user_lock_free(user_lock, gtid, lck);
  2376. }
  2377. #endif // KMP_USE_DYNAMIC_LOCK
  2378. } // __kmpc_destroy_nest_lock
  2379. void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2380. KMP_COUNT_BLOCK(OMP_set_lock);
  2381. #if KMP_USE_DYNAMIC_LOCK
  2382. int tag = KMP_EXTRACT_D_TAG(user_lock);
  2383. #if USE_ITT_BUILD
  2384. __kmp_itt_lock_acquiring(
  2385. (kmp_user_lock_p)
  2386. user_lock); // itt function will get to the right lock object.
  2387. #endif
  2388. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2389. // This is the case, if called from omp_init_lock_with_hint:
  2390. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2391. if (!codeptr)
  2392. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2393. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2394. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2395. ompt_mutex_lock, omp_lock_hint_none,
  2396. __ompt_get_mutex_impl_type(user_lock),
  2397. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2398. }
  2399. #endif
  2400. #if KMP_USE_INLINED_TAS
  2401. if (tag == locktag_tas && !__kmp_env_consistency_check) {
  2402. KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
  2403. } else
  2404. #elif KMP_USE_INLINED_FUTEX
  2405. if (tag == locktag_futex && !__kmp_env_consistency_check) {
  2406. KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
  2407. } else
  2408. #endif
  2409. {
  2410. __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
  2411. }
  2412. #if USE_ITT_BUILD
  2413. __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
  2414. #endif
  2415. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2416. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2417. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2418. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2419. }
  2420. #endif
  2421. #else // KMP_USE_DYNAMIC_LOCK
  2422. kmp_user_lock_p lck;
  2423. if ((__kmp_user_lock_kind == lk_tas) &&
  2424. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2425. lck = (kmp_user_lock_p)user_lock;
  2426. }
  2427. #if KMP_USE_FUTEX
  2428. else if ((__kmp_user_lock_kind == lk_futex) &&
  2429. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2430. lck = (kmp_user_lock_p)user_lock;
  2431. }
  2432. #endif
  2433. else {
  2434. lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
  2435. }
  2436. #if USE_ITT_BUILD
  2437. __kmp_itt_lock_acquiring(lck);
  2438. #endif /* USE_ITT_BUILD */
  2439. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2440. // This is the case, if called from omp_init_lock_with_hint:
  2441. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2442. if (!codeptr)
  2443. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2444. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2445. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2446. ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
  2447. (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2448. }
  2449. #endif
  2450. ACQUIRE_LOCK(lck, gtid);
  2451. #if USE_ITT_BUILD
  2452. __kmp_itt_lock_acquired(lck);
  2453. #endif /* USE_ITT_BUILD */
  2454. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2455. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2456. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2457. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2458. }
  2459. #endif
  2460. #endif // KMP_USE_DYNAMIC_LOCK
  2461. }
  2462. void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2463. #if KMP_USE_DYNAMIC_LOCK
  2464. #if USE_ITT_BUILD
  2465. __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
  2466. #endif
  2467. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2468. // This is the case, if called from omp_init_lock_with_hint:
  2469. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2470. if (!codeptr)
  2471. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2472. if (ompt_enabled.enabled) {
  2473. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2474. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2475. ompt_mutex_nest_lock, omp_lock_hint_none,
  2476. __ompt_get_mutex_impl_type(user_lock),
  2477. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2478. }
  2479. }
  2480. #endif
  2481. int acquire_status =
  2482. KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
  2483. (void)acquire_status;
  2484. #if USE_ITT_BUILD
  2485. __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
  2486. #endif
  2487. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2488. if (ompt_enabled.enabled) {
  2489. if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
  2490. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2491. // lock_first
  2492. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2493. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
  2494. codeptr);
  2495. }
  2496. } else {
  2497. if (ompt_enabled.ompt_callback_nest_lock) {
  2498. // lock_next
  2499. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2500. ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2501. }
  2502. }
  2503. }
  2504. #endif
  2505. #else // KMP_USE_DYNAMIC_LOCK
  2506. int acquire_status;
  2507. kmp_user_lock_p lck;
  2508. if ((__kmp_user_lock_kind == lk_tas) &&
  2509. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2510. OMP_NEST_LOCK_T_SIZE)) {
  2511. lck = (kmp_user_lock_p)user_lock;
  2512. }
  2513. #if KMP_USE_FUTEX
  2514. else if ((__kmp_user_lock_kind == lk_futex) &&
  2515. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2516. OMP_NEST_LOCK_T_SIZE)) {
  2517. lck = (kmp_user_lock_p)user_lock;
  2518. }
  2519. #endif
  2520. else {
  2521. lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
  2522. }
  2523. #if USE_ITT_BUILD
  2524. __kmp_itt_lock_acquiring(lck);
  2525. #endif /* USE_ITT_BUILD */
  2526. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2527. // This is the case, if called from omp_init_lock_with_hint:
  2528. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2529. if (!codeptr)
  2530. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2531. if (ompt_enabled.enabled) {
  2532. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2533. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2534. ompt_mutex_nest_lock, omp_lock_hint_none,
  2535. __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
  2536. codeptr);
  2537. }
  2538. }
  2539. #endif
  2540. ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
  2541. #if USE_ITT_BUILD
  2542. __kmp_itt_lock_acquired(lck);
  2543. #endif /* USE_ITT_BUILD */
  2544. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2545. if (ompt_enabled.enabled) {
  2546. if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
  2547. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2548. // lock_first
  2549. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2550. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2551. }
  2552. } else {
  2553. if (ompt_enabled.ompt_callback_nest_lock) {
  2554. // lock_next
  2555. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2556. ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2557. }
  2558. }
  2559. }
  2560. #endif
  2561. #endif // KMP_USE_DYNAMIC_LOCK
  2562. }
  2563. void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2564. #if KMP_USE_DYNAMIC_LOCK
  2565. int tag = KMP_EXTRACT_D_TAG(user_lock);
  2566. #if USE_ITT_BUILD
  2567. __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
  2568. #endif
  2569. #if KMP_USE_INLINED_TAS
  2570. if (tag == locktag_tas && !__kmp_env_consistency_check) {
  2571. KMP_RELEASE_TAS_LOCK(user_lock, gtid);
  2572. } else
  2573. #elif KMP_USE_INLINED_FUTEX
  2574. if (tag == locktag_futex && !__kmp_env_consistency_check) {
  2575. KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
  2576. } else
  2577. #endif
  2578. {
  2579. __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
  2580. }
  2581. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2582. // This is the case, if called from omp_init_lock_with_hint:
  2583. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2584. if (!codeptr)
  2585. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2586. if (ompt_enabled.ompt_callback_mutex_released) {
  2587. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2588. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2589. }
  2590. #endif
  2591. #else // KMP_USE_DYNAMIC_LOCK
  2592. kmp_user_lock_p lck;
  2593. /* Can't use serial interval since not block structured */
  2594. /* release the lock */
  2595. if ((__kmp_user_lock_kind == lk_tas) &&
  2596. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2597. #if KMP_OS_LINUX && \
  2598. (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
  2599. // "fast" path implemented to fix customer performance issue
  2600. #if USE_ITT_BUILD
  2601. __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
  2602. #endif /* USE_ITT_BUILD */
  2603. TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
  2604. KMP_MB();
  2605. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2606. // This is the case, if called from omp_init_lock_with_hint:
  2607. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2608. if (!codeptr)
  2609. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2610. if (ompt_enabled.ompt_callback_mutex_released) {
  2611. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2612. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2613. }
  2614. #endif
  2615. return;
  2616. #else
  2617. lck = (kmp_user_lock_p)user_lock;
  2618. #endif
  2619. }
  2620. #if KMP_USE_FUTEX
  2621. else if ((__kmp_user_lock_kind == lk_futex) &&
  2622. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2623. lck = (kmp_user_lock_p)user_lock;
  2624. }
  2625. #endif
  2626. else {
  2627. lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
  2628. }
  2629. #if USE_ITT_BUILD
  2630. __kmp_itt_lock_releasing(lck);
  2631. #endif /* USE_ITT_BUILD */
  2632. RELEASE_LOCK(lck, gtid);
  2633. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2634. // This is the case, if called from omp_init_lock_with_hint:
  2635. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2636. if (!codeptr)
  2637. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2638. if (ompt_enabled.ompt_callback_mutex_released) {
  2639. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2640. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2641. }
  2642. #endif
  2643. #endif // KMP_USE_DYNAMIC_LOCK
  2644. }
  2645. /* release the lock */
  2646. void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2647. #if KMP_USE_DYNAMIC_LOCK
  2648. #if USE_ITT_BUILD
  2649. __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
  2650. #endif
  2651. int release_status =
  2652. KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
  2653. (void)release_status;
  2654. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2655. // This is the case, if called from omp_init_lock_with_hint:
  2656. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2657. if (!codeptr)
  2658. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2659. if (ompt_enabled.enabled) {
  2660. if (release_status == KMP_LOCK_RELEASED) {
  2661. if (ompt_enabled.ompt_callback_mutex_released) {
  2662. // release_lock_last
  2663. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2664. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
  2665. codeptr);
  2666. }
  2667. } else if (ompt_enabled.ompt_callback_nest_lock) {
  2668. // release_lock_prev
  2669. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2670. ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2671. }
  2672. }
  2673. #endif
  2674. #else // KMP_USE_DYNAMIC_LOCK
  2675. kmp_user_lock_p lck;
  2676. /* Can't use serial interval since not block structured */
  2677. if ((__kmp_user_lock_kind == lk_tas) &&
  2678. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2679. OMP_NEST_LOCK_T_SIZE)) {
  2680. #if KMP_OS_LINUX && \
  2681. (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
  2682. // "fast" path implemented to fix customer performance issue
  2683. kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
  2684. #if USE_ITT_BUILD
  2685. __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
  2686. #endif /* USE_ITT_BUILD */
  2687. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2688. int release_status = KMP_LOCK_STILL_HELD;
  2689. #endif
  2690. if (--(tl->lk.depth_locked) == 0) {
  2691. TCW_4(tl->lk.poll, 0);
  2692. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2693. release_status = KMP_LOCK_RELEASED;
  2694. #endif
  2695. }
  2696. KMP_MB();
  2697. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2698. // This is the case, if called from omp_init_lock_with_hint:
  2699. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2700. if (!codeptr)
  2701. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2702. if (ompt_enabled.enabled) {
  2703. if (release_status == KMP_LOCK_RELEASED) {
  2704. if (ompt_enabled.ompt_callback_mutex_released) {
  2705. // release_lock_last
  2706. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2707. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2708. }
  2709. } else if (ompt_enabled.ompt_callback_nest_lock) {
  2710. // release_lock_previous
  2711. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2712. ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2713. }
  2714. }
  2715. #endif
  2716. return;
  2717. #else
  2718. lck = (kmp_user_lock_p)user_lock;
  2719. #endif
  2720. }
  2721. #if KMP_USE_FUTEX
  2722. else if ((__kmp_user_lock_kind == lk_futex) &&
  2723. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2724. OMP_NEST_LOCK_T_SIZE)) {
  2725. lck = (kmp_user_lock_p)user_lock;
  2726. }
  2727. #endif
  2728. else {
  2729. lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
  2730. }
  2731. #if USE_ITT_BUILD
  2732. __kmp_itt_lock_releasing(lck);
  2733. #endif /* USE_ITT_BUILD */
  2734. int release_status;
  2735. release_status = RELEASE_NESTED_LOCK(lck, gtid);
  2736. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2737. // This is the case, if called from omp_init_lock_with_hint:
  2738. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2739. if (!codeptr)
  2740. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2741. if (ompt_enabled.enabled) {
  2742. if (release_status == KMP_LOCK_RELEASED) {
  2743. if (ompt_enabled.ompt_callback_mutex_released) {
  2744. // release_lock_last
  2745. ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
  2746. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2747. }
  2748. } else if (ompt_enabled.ompt_callback_nest_lock) {
  2749. // release_lock_previous
  2750. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2751. ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2752. }
  2753. }
  2754. #endif
  2755. #endif // KMP_USE_DYNAMIC_LOCK
  2756. }
  2757. /* try to acquire the lock */
  2758. int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2759. KMP_COUNT_BLOCK(OMP_test_lock);
  2760. #if KMP_USE_DYNAMIC_LOCK
  2761. int rc;
  2762. int tag = KMP_EXTRACT_D_TAG(user_lock);
  2763. #if USE_ITT_BUILD
  2764. __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
  2765. #endif
  2766. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2767. // This is the case, if called from omp_init_lock_with_hint:
  2768. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2769. if (!codeptr)
  2770. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2771. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2772. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2773. ompt_mutex_lock, omp_lock_hint_none,
  2774. __ompt_get_mutex_impl_type(user_lock),
  2775. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2776. }
  2777. #endif
  2778. #if KMP_USE_INLINED_TAS
  2779. if (tag == locktag_tas && !__kmp_env_consistency_check) {
  2780. KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
  2781. } else
  2782. #elif KMP_USE_INLINED_FUTEX
  2783. if (tag == locktag_futex && !__kmp_env_consistency_check) {
  2784. KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
  2785. } else
  2786. #endif
  2787. {
  2788. rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
  2789. }
  2790. if (rc) {
  2791. #if USE_ITT_BUILD
  2792. __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
  2793. #endif
  2794. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2795. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2796. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2797. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2798. }
  2799. #endif
  2800. return FTN_TRUE;
  2801. } else {
  2802. #if USE_ITT_BUILD
  2803. __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
  2804. #endif
  2805. return FTN_FALSE;
  2806. }
  2807. #else // KMP_USE_DYNAMIC_LOCK
  2808. kmp_user_lock_p lck;
  2809. int rc;
  2810. if ((__kmp_user_lock_kind == lk_tas) &&
  2811. (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2812. lck = (kmp_user_lock_p)user_lock;
  2813. }
  2814. #if KMP_USE_FUTEX
  2815. else if ((__kmp_user_lock_kind == lk_futex) &&
  2816. (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
  2817. lck = (kmp_user_lock_p)user_lock;
  2818. }
  2819. #endif
  2820. else {
  2821. lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
  2822. }
  2823. #if USE_ITT_BUILD
  2824. __kmp_itt_lock_acquiring(lck);
  2825. #endif /* USE_ITT_BUILD */
  2826. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2827. // This is the case, if called from omp_init_lock_with_hint:
  2828. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2829. if (!codeptr)
  2830. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2831. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2832. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2833. ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
  2834. (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2835. }
  2836. #endif
  2837. rc = TEST_LOCK(lck, gtid);
  2838. #if USE_ITT_BUILD
  2839. if (rc) {
  2840. __kmp_itt_lock_acquired(lck);
  2841. } else {
  2842. __kmp_itt_lock_cancelled(lck);
  2843. }
  2844. #endif /* USE_ITT_BUILD */
  2845. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2846. if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
  2847. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2848. ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2849. }
  2850. #endif
  2851. return (rc ? FTN_TRUE : FTN_FALSE);
  2852. /* Can't use serial interval since not block structured */
  2853. #endif // KMP_USE_DYNAMIC_LOCK
  2854. }
  2855. /* try to acquire the lock */
  2856. int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
  2857. #if KMP_USE_DYNAMIC_LOCK
  2858. int rc;
  2859. #if USE_ITT_BUILD
  2860. __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
  2861. #endif
  2862. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2863. // This is the case, if called from omp_init_lock_with_hint:
  2864. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2865. if (!codeptr)
  2866. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2867. if (ompt_enabled.ompt_callback_mutex_acquire) {
  2868. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2869. ompt_mutex_nest_lock, omp_lock_hint_none,
  2870. __ompt_get_mutex_impl_type(user_lock),
  2871. (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2872. }
  2873. #endif
  2874. rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
  2875. #if USE_ITT_BUILD
  2876. if (rc) {
  2877. __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
  2878. } else {
  2879. __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
  2880. }
  2881. #endif
  2882. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2883. if (ompt_enabled.enabled && rc) {
  2884. if (rc == 1) {
  2885. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2886. // lock_first
  2887. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2888. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
  2889. codeptr);
  2890. }
  2891. } else {
  2892. if (ompt_enabled.ompt_callback_nest_lock) {
  2893. // lock_next
  2894. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2895. ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
  2896. }
  2897. }
  2898. }
  2899. #endif
  2900. return rc;
  2901. #else // KMP_USE_DYNAMIC_LOCK
  2902. kmp_user_lock_p lck;
  2903. int rc;
  2904. if ((__kmp_user_lock_kind == lk_tas) &&
  2905. (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
  2906. OMP_NEST_LOCK_T_SIZE)) {
  2907. lck = (kmp_user_lock_p)user_lock;
  2908. }
  2909. #if KMP_USE_FUTEX
  2910. else if ((__kmp_user_lock_kind == lk_futex) &&
  2911. (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
  2912. OMP_NEST_LOCK_T_SIZE)) {
  2913. lck = (kmp_user_lock_p)user_lock;
  2914. }
  2915. #endif
  2916. else {
  2917. lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
  2918. }
  2919. #if USE_ITT_BUILD
  2920. __kmp_itt_lock_acquiring(lck);
  2921. #endif /* USE_ITT_BUILD */
  2922. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2923. // This is the case, if called from omp_init_lock_with_hint:
  2924. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2925. if (!codeptr)
  2926. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2927. if (ompt_enabled.enabled) &&
  2928. ompt_enabled.ompt_callback_mutex_acquire) {
  2929. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
  2930. ompt_mutex_nest_lock, omp_lock_hint_none,
  2931. __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
  2932. codeptr);
  2933. }
  2934. #endif
  2935. rc = TEST_NESTED_LOCK(lck, gtid);
  2936. #if USE_ITT_BUILD
  2937. if (rc) {
  2938. __kmp_itt_lock_acquired(lck);
  2939. } else {
  2940. __kmp_itt_lock_cancelled(lck);
  2941. }
  2942. #endif /* USE_ITT_BUILD */
  2943. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2944. if (ompt_enabled.enabled && rc) {
  2945. if (rc == 1) {
  2946. if (ompt_enabled.ompt_callback_mutex_acquired) {
  2947. // lock_first
  2948. ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
  2949. ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2950. }
  2951. } else {
  2952. if (ompt_enabled.ompt_callback_nest_lock) {
  2953. // lock_next
  2954. ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
  2955. ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
  2956. }
  2957. }
  2958. }
  2959. #endif
  2960. return rc;
  2961. /* Can't use serial interval since not block structured */
  2962. #endif // KMP_USE_DYNAMIC_LOCK
  2963. }
  2964. // Interface to fast scalable reduce methods routines
  2965. // keep the selected method in a thread local structure for cross-function
  2966. // usage: will be used in __kmpc_end_reduce* functions;
  2967. // another solution: to re-determine the method one more time in
  2968. // __kmpc_end_reduce* functions (new prototype required then)
  2969. // AT: which solution is better?
  2970. #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
  2971. ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
  2972. #define __KMP_GET_REDUCTION_METHOD(gtid) \
  2973. (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
  2974. // description of the packed_reduction_method variable: look at the macros in
  2975. // kmp.h
  2976. // used in a critical section reduce block
  2977. static __forceinline void
  2978. __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
  2979. kmp_critical_name *crit) {
  2980. // this lock was visible to a customer and to the threading profile tool as a
  2981. // serial overhead span (although it's used for an internal purpose only)
  2982. // why was it visible in previous implementation?
  2983. // should we keep it visible in new reduce block?
  2984. kmp_user_lock_p lck;
  2985. #if KMP_USE_DYNAMIC_LOCK
  2986. kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
  2987. // Check if it is initialized.
  2988. if (*lk == 0) {
  2989. if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
  2990. KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
  2991. KMP_GET_D_TAG(__kmp_user_lock_seq));
  2992. } else {
  2993. __kmp_init_indirect_csptr(crit, loc, global_tid,
  2994. KMP_GET_I_TAG(__kmp_user_lock_seq));
  2995. }
  2996. }
  2997. // Branch for accessing the actual lock object and set operation. This
  2998. // branching is inevitable since this lock initialization does not follow the
  2999. // normal dispatch path (lock table is not used).
  3000. if (KMP_EXTRACT_D_TAG(lk) != 0) {
  3001. lck = (kmp_user_lock_p)lk;
  3002. KMP_DEBUG_ASSERT(lck != NULL);
  3003. if (__kmp_env_consistency_check) {
  3004. __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
  3005. }
  3006. KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
  3007. } else {
  3008. kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
  3009. lck = ilk->lock;
  3010. KMP_DEBUG_ASSERT(lck != NULL);
  3011. if (__kmp_env_consistency_check) {
  3012. __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
  3013. }
  3014. KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
  3015. }
  3016. #else // KMP_USE_DYNAMIC_LOCK
  3017. // We know that the fast reduction code is only emitted by Intel compilers
  3018. // with 32 byte critical sections. If there isn't enough space, then we
  3019. // have to use a pointer.
  3020. if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
  3021. lck = (kmp_user_lock_p)crit;
  3022. } else {
  3023. lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
  3024. }
  3025. KMP_DEBUG_ASSERT(lck != NULL);
  3026. if (__kmp_env_consistency_check)
  3027. __kmp_push_sync(global_tid, ct_critical, loc, lck);
  3028. __kmp_acquire_user_lock_with_checks(lck, global_tid);
  3029. #endif // KMP_USE_DYNAMIC_LOCK
  3030. }
  3031. // used in a critical section reduce block
  3032. static __forceinline void
  3033. __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
  3034. kmp_critical_name *crit) {
  3035. kmp_user_lock_p lck;
  3036. #if KMP_USE_DYNAMIC_LOCK
  3037. if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
  3038. lck = (kmp_user_lock_p)crit;
  3039. if (__kmp_env_consistency_check)
  3040. __kmp_pop_sync(global_tid, ct_critical, loc);
  3041. KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
  3042. } else {
  3043. kmp_indirect_lock_t *ilk =
  3044. (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
  3045. if (__kmp_env_consistency_check)
  3046. __kmp_pop_sync(global_tid, ct_critical, loc);
  3047. KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
  3048. }
  3049. #else // KMP_USE_DYNAMIC_LOCK
  3050. // We know that the fast reduction code is only emitted by Intel compilers
  3051. // with 32 byte critical sections. If there isn't enough space, then we have
  3052. // to use a pointer.
  3053. if (__kmp_base_user_lock_size > 32) {
  3054. lck = *((kmp_user_lock_p *)crit);
  3055. KMP_ASSERT(lck != NULL);
  3056. } else {
  3057. lck = (kmp_user_lock_p)crit;
  3058. }
  3059. if (__kmp_env_consistency_check)
  3060. __kmp_pop_sync(global_tid, ct_critical, loc);
  3061. __kmp_release_user_lock_with_checks(lck, global_tid);
  3062. #endif // KMP_USE_DYNAMIC_LOCK
  3063. } // __kmp_end_critical_section_reduce_block
  3064. static __forceinline int
  3065. __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
  3066. int *task_state) {
  3067. kmp_team_t *team;
  3068. // Check if we are inside the teams construct?
  3069. if (th->th.th_teams_microtask) {
  3070. *team_p = team = th->th.th_team;
  3071. if (team->t.t_level == th->th.th_teams_level) {
  3072. // This is reduction at teams construct.
  3073. KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
  3074. // Let's swap teams temporarily for the reduction.
  3075. th->th.th_info.ds.ds_tid = team->t.t_master_tid;
  3076. th->th.th_team = team->t.t_parent;
  3077. th->th.th_team_nproc = th->th.th_team->t.t_nproc;
  3078. th->th.th_task_team = th->th.th_team->t.t_task_team[0];
  3079. *task_state = th->th.th_task_state;
  3080. th->th.th_task_state = 0;
  3081. return 1;
  3082. }
  3083. }
  3084. return 0;
  3085. }
  3086. static __forceinline void
  3087. __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
  3088. // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
  3089. th->th.th_info.ds.ds_tid = 0;
  3090. th->th.th_team = team;
  3091. th->th.th_team_nproc = team->t.t_nproc;
  3092. th->th.th_task_team = team->t.t_task_team[task_state];
  3093. __kmp_type_convert(task_state, &(th->th.th_task_state));
  3094. }
  3095. /* 2.a.i. Reduce Block without a terminating barrier */
  3096. /*!
  3097. @ingroup SYNCHRONIZATION
  3098. @param loc source location information
  3099. @param global_tid global thread number
  3100. @param num_vars number of items (variables) to be reduced
  3101. @param reduce_size size of data in bytes to be reduced
  3102. @param reduce_data pointer to data to be reduced
  3103. @param reduce_func callback function providing reduction operation on two
  3104. operands and returning result of reduction in lhs_data
  3105. @param lck pointer to the unique lock data structure
  3106. @result 1 for the primary thread, 0 for all other team threads, 2 for all team
  3107. threads if atomic reduction needed
  3108. The nowait version is used for a reduce clause with the nowait argument.
  3109. */
  3110. kmp_int32
  3111. __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
  3112. size_t reduce_size, void *reduce_data,
  3113. void (*reduce_func)(void *lhs_data, void *rhs_data),
  3114. kmp_critical_name *lck) {
  3115. KMP_COUNT_BLOCK(REDUCE_nowait);
  3116. int retval = 0;
  3117. PACKED_REDUCTION_METHOD_T packed_reduction_method;
  3118. kmp_info_t *th;
  3119. kmp_team_t *team;
  3120. int teams_swapped = 0, task_state;
  3121. KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
  3122. __kmp_assert_valid_gtid(global_tid);
  3123. // why do we need this initialization here at all?
  3124. // Reduction clause can not be used as a stand-alone directive.
  3125. // do not call __kmp_serial_initialize(), it will be called by
  3126. // __kmp_parallel_initialize() if needed
  3127. // possible detection of false-positive race by the threadchecker ???
  3128. if (!TCR_4(__kmp_init_parallel))
  3129. __kmp_parallel_initialize();
  3130. __kmp_resume_if_soft_paused();
  3131. // check correctness of reduce block nesting
  3132. #if KMP_USE_DYNAMIC_LOCK
  3133. if (__kmp_env_consistency_check)
  3134. __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
  3135. #else
  3136. if (__kmp_env_consistency_check)
  3137. __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
  3138. #endif
  3139. th = __kmp_thread_from_gtid(global_tid);
  3140. teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
  3141. // packed_reduction_method value will be reused by __kmp_end_reduce* function,
  3142. // the value should be kept in a variable
  3143. // the variable should be either a construct-specific or thread-specific
  3144. // property, not a team specific property
  3145. // (a thread can reach the next reduce block on the next construct, reduce
  3146. // method may differ on the next construct)
  3147. // an ident_t "loc" parameter could be used as a construct-specific property
  3148. // (what if loc == 0?)
  3149. // (if both construct-specific and team-specific variables were shared,
  3150. // then unness extra syncs should be needed)
  3151. // a thread-specific variable is better regarding two issues above (next
  3152. // construct and extra syncs)
  3153. // a thread-specific "th_local.reduction_method" variable is used currently
  3154. // each thread executes 'determine' and 'set' lines (no need to execute by one
  3155. // thread, to avoid unness extra syncs)
  3156. packed_reduction_method = __kmp_determine_reduction_method(
  3157. loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
  3158. __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
  3159. OMPT_REDUCTION_DECL(th, global_tid);
  3160. if (packed_reduction_method == critical_reduce_block) {
  3161. OMPT_REDUCTION_BEGIN;
  3162. __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
  3163. retval = 1;
  3164. } else if (packed_reduction_method == empty_reduce_block) {
  3165. OMPT_REDUCTION_BEGIN;
  3166. // usage: if team size == 1, no synchronization is required ( Intel
  3167. // platforms only )
  3168. retval = 1;
  3169. } else if (packed_reduction_method == atomic_reduce_block) {
  3170. retval = 2;
  3171. // all threads should do this pop here (because __kmpc_end_reduce_nowait()
  3172. // won't be called by the code gen)
  3173. // (it's not quite good, because the checking block has been closed by
  3174. // this 'pop',
  3175. // but atomic operation has not been executed yet, will be executed
  3176. // slightly later, literally on next instruction)
  3177. if (__kmp_env_consistency_check)
  3178. __kmp_pop_sync(global_tid, ct_reduce, loc);
  3179. } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
  3180. tree_reduce_block)) {
  3181. // AT: performance issue: a real barrier here
  3182. // AT: (if primary thread is slow, other threads are blocked here waiting for
  3183. // the primary thread to come and release them)
  3184. // AT: (it's not what a customer might expect specifying NOWAIT clause)
  3185. // AT: (specifying NOWAIT won't result in improvement of performance, it'll
  3186. // be confusing to a customer)
  3187. // AT: another implementation of *barrier_gather*nowait() (or some other design)
  3188. // might go faster and be more in line with sense of NOWAIT
  3189. // AT: TO DO: do epcc test and compare times
  3190. // this barrier should be invisible to a customer and to the threading profile
  3191. // tool (it's neither a terminating barrier nor customer's code, it's
  3192. // used for an internal purpose)
  3193. #if OMPT_SUPPORT
  3194. // JP: can this barrier potentially leed to task scheduling?
  3195. // JP: as long as there is a barrier in the implementation, OMPT should and
  3196. // will provide the barrier events
  3197. // so we set-up the necessary frame/return addresses.
  3198. ompt_frame_t *ompt_frame;
  3199. if (ompt_enabled.enabled) {
  3200. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  3201. if (ompt_frame->enter_frame.ptr == NULL)
  3202. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  3203. }
  3204. OMPT_STORE_RETURN_ADDRESS(global_tid);
  3205. #endif
  3206. #if USE_ITT_NOTIFY
  3207. __kmp_threads[global_tid]->th.th_ident = loc;
  3208. #endif
  3209. retval =
  3210. __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
  3211. global_tid, FALSE, reduce_size, reduce_data, reduce_func);
  3212. retval = (retval != 0) ? (0) : (1);
  3213. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3214. if (ompt_enabled.enabled) {
  3215. ompt_frame->enter_frame = ompt_data_none;
  3216. }
  3217. #endif
  3218. // all other workers except primary thread should do this pop here
  3219. // ( none of other workers will get to __kmpc_end_reduce_nowait() )
  3220. if (__kmp_env_consistency_check) {
  3221. if (retval == 0) {
  3222. __kmp_pop_sync(global_tid, ct_reduce, loc);
  3223. }
  3224. }
  3225. } else {
  3226. // should never reach this block
  3227. KMP_ASSERT(0); // "unexpected method"
  3228. }
  3229. if (teams_swapped) {
  3230. __kmp_restore_swapped_teams(th, team, task_state);
  3231. }
  3232. KA_TRACE(
  3233. 10,
  3234. ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
  3235. global_tid, packed_reduction_method, retval));
  3236. return retval;
  3237. }
  3238. /*!
  3239. @ingroup SYNCHRONIZATION
  3240. @param loc source location information
  3241. @param global_tid global thread id.
  3242. @param lck pointer to the unique lock data structure
  3243. Finish the execution of a reduce nowait.
  3244. */
  3245. void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
  3246. kmp_critical_name *lck) {
  3247. PACKED_REDUCTION_METHOD_T packed_reduction_method;
  3248. KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
  3249. __kmp_assert_valid_gtid(global_tid);
  3250. packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
  3251. OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
  3252. if (packed_reduction_method == critical_reduce_block) {
  3253. __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
  3254. OMPT_REDUCTION_END;
  3255. } else if (packed_reduction_method == empty_reduce_block) {
  3256. // usage: if team size == 1, no synchronization is required ( on Intel
  3257. // platforms only )
  3258. OMPT_REDUCTION_END;
  3259. } else if (packed_reduction_method == atomic_reduce_block) {
  3260. // neither primary thread nor other workers should get here
  3261. // (code gen does not generate this call in case 2: atomic reduce block)
  3262. // actually it's better to remove this elseif at all;
  3263. // after removal this value will checked by the 'else' and will assert
  3264. } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
  3265. tree_reduce_block)) {
  3266. // only primary thread gets here
  3267. // OMPT: tree reduction is annotated in the barrier code
  3268. } else {
  3269. // should never reach this block
  3270. KMP_ASSERT(0); // "unexpected method"
  3271. }
  3272. if (__kmp_env_consistency_check)
  3273. __kmp_pop_sync(global_tid, ct_reduce, loc);
  3274. KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
  3275. global_tid, packed_reduction_method));
  3276. return;
  3277. }
  3278. /* 2.a.ii. Reduce Block with a terminating barrier */
  3279. /*!
  3280. @ingroup SYNCHRONIZATION
  3281. @param loc source location information
  3282. @param global_tid global thread number
  3283. @param num_vars number of items (variables) to be reduced
  3284. @param reduce_size size of data in bytes to be reduced
  3285. @param reduce_data pointer to data to be reduced
  3286. @param reduce_func callback function providing reduction operation on two
  3287. operands and returning result of reduction in lhs_data
  3288. @param lck pointer to the unique lock data structure
  3289. @result 1 for the primary thread, 0 for all other team threads, 2 for all team
  3290. threads if atomic reduction needed
  3291. A blocking reduce that includes an implicit barrier.
  3292. */
  3293. kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
  3294. size_t reduce_size, void *reduce_data,
  3295. void (*reduce_func)(void *lhs_data, void *rhs_data),
  3296. kmp_critical_name *lck) {
  3297. KMP_COUNT_BLOCK(REDUCE_wait);
  3298. int retval = 0;
  3299. PACKED_REDUCTION_METHOD_T packed_reduction_method;
  3300. kmp_info_t *th;
  3301. kmp_team_t *team;
  3302. int teams_swapped = 0, task_state;
  3303. KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
  3304. __kmp_assert_valid_gtid(global_tid);
  3305. // why do we need this initialization here at all?
  3306. // Reduction clause can not be a stand-alone directive.
  3307. // do not call __kmp_serial_initialize(), it will be called by
  3308. // __kmp_parallel_initialize() if needed
  3309. // possible detection of false-positive race by the threadchecker ???
  3310. if (!TCR_4(__kmp_init_parallel))
  3311. __kmp_parallel_initialize();
  3312. __kmp_resume_if_soft_paused();
  3313. // check correctness of reduce block nesting
  3314. #if KMP_USE_DYNAMIC_LOCK
  3315. if (__kmp_env_consistency_check)
  3316. __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
  3317. #else
  3318. if (__kmp_env_consistency_check)
  3319. __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
  3320. #endif
  3321. th = __kmp_thread_from_gtid(global_tid);
  3322. teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
  3323. packed_reduction_method = __kmp_determine_reduction_method(
  3324. loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
  3325. __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
  3326. OMPT_REDUCTION_DECL(th, global_tid);
  3327. if (packed_reduction_method == critical_reduce_block) {
  3328. OMPT_REDUCTION_BEGIN;
  3329. __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
  3330. retval = 1;
  3331. } else if (packed_reduction_method == empty_reduce_block) {
  3332. OMPT_REDUCTION_BEGIN;
  3333. // usage: if team size == 1, no synchronization is required ( Intel
  3334. // platforms only )
  3335. retval = 1;
  3336. } else if (packed_reduction_method == atomic_reduce_block) {
  3337. retval = 2;
  3338. } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
  3339. tree_reduce_block)) {
  3340. // case tree_reduce_block:
  3341. // this barrier should be visible to a customer and to the threading profile
  3342. // tool (it's a terminating barrier on constructs if NOWAIT not specified)
  3343. #if OMPT_SUPPORT
  3344. ompt_frame_t *ompt_frame;
  3345. if (ompt_enabled.enabled) {
  3346. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  3347. if (ompt_frame->enter_frame.ptr == NULL)
  3348. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  3349. }
  3350. OMPT_STORE_RETURN_ADDRESS(global_tid);
  3351. #endif
  3352. #if USE_ITT_NOTIFY
  3353. __kmp_threads[global_tid]->th.th_ident =
  3354. loc; // needed for correct notification of frames
  3355. #endif
  3356. retval =
  3357. __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
  3358. global_tid, TRUE, reduce_size, reduce_data, reduce_func);
  3359. retval = (retval != 0) ? (0) : (1);
  3360. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3361. if (ompt_enabled.enabled) {
  3362. ompt_frame->enter_frame = ompt_data_none;
  3363. }
  3364. #endif
  3365. // all other workers except primary thread should do this pop here
  3366. // (none of other workers except primary will enter __kmpc_end_reduce())
  3367. if (__kmp_env_consistency_check) {
  3368. if (retval == 0) { // 0: all other workers; 1: primary thread
  3369. __kmp_pop_sync(global_tid, ct_reduce, loc);
  3370. }
  3371. }
  3372. } else {
  3373. // should never reach this block
  3374. KMP_ASSERT(0); // "unexpected method"
  3375. }
  3376. if (teams_swapped) {
  3377. __kmp_restore_swapped_teams(th, team, task_state);
  3378. }
  3379. KA_TRACE(10,
  3380. ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
  3381. global_tid, packed_reduction_method, retval));
  3382. return retval;
  3383. }
  3384. /*!
  3385. @ingroup SYNCHRONIZATION
  3386. @param loc source location information
  3387. @param global_tid global thread id.
  3388. @param lck pointer to the unique lock data structure
  3389. Finish the execution of a blocking reduce.
  3390. The <tt>lck</tt> pointer must be the same as that used in the corresponding
  3391. start function.
  3392. */
  3393. void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
  3394. kmp_critical_name *lck) {
  3395. PACKED_REDUCTION_METHOD_T packed_reduction_method;
  3396. kmp_info_t *th;
  3397. kmp_team_t *team;
  3398. int teams_swapped = 0, task_state;
  3399. KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
  3400. __kmp_assert_valid_gtid(global_tid);
  3401. th = __kmp_thread_from_gtid(global_tid);
  3402. teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
  3403. packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
  3404. // this barrier should be visible to a customer and to the threading profile
  3405. // tool (it's a terminating barrier on constructs if NOWAIT not specified)
  3406. OMPT_REDUCTION_DECL(th, global_tid);
  3407. if (packed_reduction_method == critical_reduce_block) {
  3408. __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
  3409. OMPT_REDUCTION_END;
  3410. // TODO: implicit barrier: should be exposed
  3411. #if OMPT_SUPPORT
  3412. ompt_frame_t *ompt_frame;
  3413. if (ompt_enabled.enabled) {
  3414. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  3415. if (ompt_frame->enter_frame.ptr == NULL)
  3416. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  3417. }
  3418. OMPT_STORE_RETURN_ADDRESS(global_tid);
  3419. #endif
  3420. #if USE_ITT_NOTIFY
  3421. __kmp_threads[global_tid]->th.th_ident = loc;
  3422. #endif
  3423. __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
  3424. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3425. if (ompt_enabled.enabled) {
  3426. ompt_frame->enter_frame = ompt_data_none;
  3427. }
  3428. #endif
  3429. } else if (packed_reduction_method == empty_reduce_block) {
  3430. OMPT_REDUCTION_END;
  3431. // usage: if team size==1, no synchronization is required (Intel platforms only)
  3432. // TODO: implicit barrier: should be exposed
  3433. #if OMPT_SUPPORT
  3434. ompt_frame_t *ompt_frame;
  3435. if (ompt_enabled.enabled) {
  3436. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  3437. if (ompt_frame->enter_frame.ptr == NULL)
  3438. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  3439. }
  3440. OMPT_STORE_RETURN_ADDRESS(global_tid);
  3441. #endif
  3442. #if USE_ITT_NOTIFY
  3443. __kmp_threads[global_tid]->th.th_ident = loc;
  3444. #endif
  3445. __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
  3446. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3447. if (ompt_enabled.enabled) {
  3448. ompt_frame->enter_frame = ompt_data_none;
  3449. }
  3450. #endif
  3451. } else if (packed_reduction_method == atomic_reduce_block) {
  3452. #if OMPT_SUPPORT
  3453. ompt_frame_t *ompt_frame;
  3454. if (ompt_enabled.enabled) {
  3455. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  3456. if (ompt_frame->enter_frame.ptr == NULL)
  3457. ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  3458. }
  3459. OMPT_STORE_RETURN_ADDRESS(global_tid);
  3460. #endif
  3461. // TODO: implicit barrier: should be exposed
  3462. #if USE_ITT_NOTIFY
  3463. __kmp_threads[global_tid]->th.th_ident = loc;
  3464. #endif
  3465. __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
  3466. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3467. if (ompt_enabled.enabled) {
  3468. ompt_frame->enter_frame = ompt_data_none;
  3469. }
  3470. #endif
  3471. } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
  3472. tree_reduce_block)) {
  3473. // only primary thread executes here (primary releases all other workers)
  3474. __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
  3475. global_tid);
  3476. } else {
  3477. // should never reach this block
  3478. KMP_ASSERT(0); // "unexpected method"
  3479. }
  3480. if (teams_swapped) {
  3481. __kmp_restore_swapped_teams(th, team, task_state);
  3482. }
  3483. if (__kmp_env_consistency_check)
  3484. __kmp_pop_sync(global_tid, ct_reduce, loc);
  3485. KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
  3486. global_tid, packed_reduction_method));
  3487. return;
  3488. }
  3489. #undef __KMP_GET_REDUCTION_METHOD
  3490. #undef __KMP_SET_REDUCTION_METHOD
  3491. /* end of interface to fast scalable reduce routines */
  3492. kmp_uint64 __kmpc_get_taskid() {
  3493. kmp_int32 gtid;
  3494. kmp_info_t *thread;
  3495. gtid = __kmp_get_gtid();
  3496. if (gtid < 0) {
  3497. return 0;
  3498. }
  3499. thread = __kmp_thread_from_gtid(gtid);
  3500. return thread->th.th_current_task->td_task_id;
  3501. } // __kmpc_get_taskid
  3502. kmp_uint64 __kmpc_get_parent_taskid() {
  3503. kmp_int32 gtid;
  3504. kmp_info_t *thread;
  3505. kmp_taskdata_t *parent_task;
  3506. gtid = __kmp_get_gtid();
  3507. if (gtid < 0) {
  3508. return 0;
  3509. }
  3510. thread = __kmp_thread_from_gtid(gtid);
  3511. parent_task = thread->th.th_current_task->td_parent;
  3512. return (parent_task == NULL ? 0 : parent_task->td_task_id);
  3513. } // __kmpc_get_parent_taskid
  3514. /*!
  3515. @ingroup WORK_SHARING
  3516. @param loc source location information.
  3517. @param gtid global thread number.
  3518. @param num_dims number of associated doacross loops.
  3519. @param dims info on loops bounds.
  3520. Initialize doacross loop information.
  3521. Expect compiler send us inclusive bounds,
  3522. e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
  3523. */
  3524. void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
  3525. const struct kmp_dim *dims) {
  3526. __kmp_assert_valid_gtid(gtid);
  3527. int j, idx;
  3528. kmp_int64 last, trace_count;
  3529. kmp_info_t *th = __kmp_threads[gtid];
  3530. kmp_team_t *team = th->th.th_team;
  3531. kmp_uint32 *flags;
  3532. kmp_disp_t *pr_buf = th->th.th_dispatch;
  3533. dispatch_shared_info_t *sh_buf;
  3534. KA_TRACE(
  3535. 20,
  3536. ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
  3537. gtid, num_dims, !team->t.t_serialized));
  3538. KMP_DEBUG_ASSERT(dims != NULL);
  3539. KMP_DEBUG_ASSERT(num_dims > 0);
  3540. if (team->t.t_serialized) {
  3541. KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
  3542. return; // no dependencies if team is serialized
  3543. }
  3544. KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
  3545. idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
  3546. // the next loop
  3547. sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
  3548. // Save bounds info into allocated private buffer
  3549. KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
  3550. pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
  3551. th, sizeof(kmp_int64) * (4 * num_dims + 1));
  3552. KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
  3553. pr_buf->th_doacross_info[0] =
  3554. (kmp_int64)num_dims; // first element is number of dimensions
  3555. // Save also address of num_done in order to access it later without knowing
  3556. // the buffer index
  3557. pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
  3558. pr_buf->th_doacross_info[2] = dims[0].lo;
  3559. pr_buf->th_doacross_info[3] = dims[0].up;
  3560. pr_buf->th_doacross_info[4] = dims[0].st;
  3561. last = 5;
  3562. for (j = 1; j < num_dims; ++j) {
  3563. kmp_int64
  3564. range_length; // To keep ranges of all dimensions but the first dims[0]
  3565. if (dims[j].st == 1) { // most common case
  3566. // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
  3567. range_length = dims[j].up - dims[j].lo + 1;
  3568. } else {
  3569. if (dims[j].st > 0) {
  3570. KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
  3571. range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
  3572. } else { // negative increment
  3573. KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
  3574. range_length =
  3575. (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
  3576. }
  3577. }
  3578. pr_buf->th_doacross_info[last++] = range_length;
  3579. pr_buf->th_doacross_info[last++] = dims[j].lo;
  3580. pr_buf->th_doacross_info[last++] = dims[j].up;
  3581. pr_buf->th_doacross_info[last++] = dims[j].st;
  3582. }
  3583. // Compute total trip count.
  3584. // Start with range of dims[0] which we don't need to keep in the buffer.
  3585. if (dims[0].st == 1) { // most common case
  3586. trace_count = dims[0].up - dims[0].lo + 1;
  3587. } else if (dims[0].st > 0) {
  3588. KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
  3589. trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
  3590. } else { // negative increment
  3591. KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
  3592. trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
  3593. }
  3594. for (j = 1; j < num_dims; ++j) {
  3595. trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
  3596. }
  3597. KMP_DEBUG_ASSERT(trace_count > 0);
  3598. // Check if shared buffer is not occupied by other loop (idx -
  3599. // __kmp_dispatch_num_buffers)
  3600. if (idx != sh_buf->doacross_buf_idx) {
  3601. // Shared buffer is occupied, wait for it to be free
  3602. __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
  3603. __kmp_eq_4, NULL);
  3604. }
  3605. #if KMP_32_BIT_ARCH
  3606. // Check if we are the first thread. After the CAS the first thread gets 0,
  3607. // others get 1 if initialization is in progress, allocated pointer otherwise.
  3608. // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
  3609. flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
  3610. (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
  3611. #else
  3612. flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
  3613. (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
  3614. #endif
  3615. if (flags == NULL) {
  3616. // we are the first thread, allocate the array of flags
  3617. size_t size =
  3618. (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
  3619. flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
  3620. KMP_MB();
  3621. sh_buf->doacross_flags = flags;
  3622. } else if (flags == (kmp_uint32 *)1) {
  3623. #if KMP_32_BIT_ARCH
  3624. // initialization is still in progress, need to wait
  3625. while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
  3626. #else
  3627. while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
  3628. #endif
  3629. KMP_YIELD(TRUE);
  3630. KMP_MB();
  3631. } else {
  3632. KMP_MB();
  3633. }
  3634. KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
  3635. pr_buf->th_doacross_flags =
  3636. sh_buf->doacross_flags; // save private copy in order to not
  3637. // touch shared buffer on each iteration
  3638. KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
  3639. }
  3640. void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
  3641. __kmp_assert_valid_gtid(gtid);
  3642. kmp_int64 shft;
  3643. size_t num_dims, i;
  3644. kmp_uint32 flag;
  3645. kmp_int64 iter_number; // iteration number of "collapsed" loop nest
  3646. kmp_info_t *th = __kmp_threads[gtid];
  3647. kmp_team_t *team = th->th.th_team;
  3648. kmp_disp_t *pr_buf;
  3649. kmp_int64 lo, up, st;
  3650. KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
  3651. if (team->t.t_serialized) {
  3652. KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
  3653. return; // no dependencies if team is serialized
  3654. }
  3655. // calculate sequential iteration number and check out-of-bounds condition
  3656. pr_buf = th->th.th_dispatch;
  3657. KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
  3658. num_dims = (size_t)pr_buf->th_doacross_info[0];
  3659. lo = pr_buf->th_doacross_info[2];
  3660. up = pr_buf->th_doacross_info[3];
  3661. st = pr_buf->th_doacross_info[4];
  3662. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3663. ompt_dependence_t deps[num_dims];
  3664. #endif
  3665. if (st == 1) { // most common case
  3666. if (vec[0] < lo || vec[0] > up) {
  3667. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3668. "bounds [%lld,%lld]\n",
  3669. gtid, vec[0], lo, up));
  3670. return;
  3671. }
  3672. iter_number = vec[0] - lo;
  3673. } else if (st > 0) {
  3674. if (vec[0] < lo || vec[0] > up) {
  3675. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3676. "bounds [%lld,%lld]\n",
  3677. gtid, vec[0], lo, up));
  3678. return;
  3679. }
  3680. iter_number = (kmp_uint64)(vec[0] - lo) / st;
  3681. } else { // negative increment
  3682. if (vec[0] > lo || vec[0] < up) {
  3683. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3684. "bounds [%lld,%lld]\n",
  3685. gtid, vec[0], lo, up));
  3686. return;
  3687. }
  3688. iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
  3689. }
  3690. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3691. deps[0].variable.value = iter_number;
  3692. deps[0].dependence_type = ompt_dependence_type_sink;
  3693. #endif
  3694. for (i = 1; i < num_dims; ++i) {
  3695. kmp_int64 iter, ln;
  3696. size_t j = i * 4;
  3697. ln = pr_buf->th_doacross_info[j + 1];
  3698. lo = pr_buf->th_doacross_info[j + 2];
  3699. up = pr_buf->th_doacross_info[j + 3];
  3700. st = pr_buf->th_doacross_info[j + 4];
  3701. if (st == 1) {
  3702. if (vec[i] < lo || vec[i] > up) {
  3703. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3704. "bounds [%lld,%lld]\n",
  3705. gtid, vec[i], lo, up));
  3706. return;
  3707. }
  3708. iter = vec[i] - lo;
  3709. } else if (st > 0) {
  3710. if (vec[i] < lo || vec[i] > up) {
  3711. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3712. "bounds [%lld,%lld]\n",
  3713. gtid, vec[i], lo, up));
  3714. return;
  3715. }
  3716. iter = (kmp_uint64)(vec[i] - lo) / st;
  3717. } else { // st < 0
  3718. if (vec[i] > lo || vec[i] < up) {
  3719. KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
  3720. "bounds [%lld,%lld]\n",
  3721. gtid, vec[i], lo, up));
  3722. return;
  3723. }
  3724. iter = (kmp_uint64)(lo - vec[i]) / (-st);
  3725. }
  3726. iter_number = iter + ln * iter_number;
  3727. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3728. deps[i].variable.value = iter;
  3729. deps[i].dependence_type = ompt_dependence_type_sink;
  3730. #endif
  3731. }
  3732. shft = iter_number % 32; // use 32-bit granularity
  3733. iter_number >>= 5; // divided by 32
  3734. flag = 1 << shft;
  3735. while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
  3736. KMP_YIELD(TRUE);
  3737. }
  3738. KMP_MB();
  3739. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3740. if (ompt_enabled.ompt_callback_dependences) {
  3741. ompt_callbacks.ompt_callback(ompt_callback_dependences)(
  3742. &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
  3743. }
  3744. #endif
  3745. KA_TRACE(20,
  3746. ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
  3747. gtid, (iter_number << 5) + shft));
  3748. }
  3749. void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
  3750. __kmp_assert_valid_gtid(gtid);
  3751. kmp_int64 shft;
  3752. size_t num_dims, i;
  3753. kmp_uint32 flag;
  3754. kmp_int64 iter_number; // iteration number of "collapsed" loop nest
  3755. kmp_info_t *th = __kmp_threads[gtid];
  3756. kmp_team_t *team = th->th.th_team;
  3757. kmp_disp_t *pr_buf;
  3758. kmp_int64 lo, st;
  3759. KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
  3760. if (team->t.t_serialized) {
  3761. KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
  3762. return; // no dependencies if team is serialized
  3763. }
  3764. // calculate sequential iteration number (same as in "wait" but no
  3765. // out-of-bounds checks)
  3766. pr_buf = th->th.th_dispatch;
  3767. KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
  3768. num_dims = (size_t)pr_buf->th_doacross_info[0];
  3769. lo = pr_buf->th_doacross_info[2];
  3770. st = pr_buf->th_doacross_info[4];
  3771. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3772. ompt_dependence_t deps[num_dims];
  3773. #endif
  3774. if (st == 1) { // most common case
  3775. iter_number = vec[0] - lo;
  3776. } else if (st > 0) {
  3777. iter_number = (kmp_uint64)(vec[0] - lo) / st;
  3778. } else { // negative increment
  3779. iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
  3780. }
  3781. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3782. deps[0].variable.value = iter_number;
  3783. deps[0].dependence_type = ompt_dependence_type_source;
  3784. #endif
  3785. for (i = 1; i < num_dims; ++i) {
  3786. kmp_int64 iter, ln;
  3787. size_t j = i * 4;
  3788. ln = pr_buf->th_doacross_info[j + 1];
  3789. lo = pr_buf->th_doacross_info[j + 2];
  3790. st = pr_buf->th_doacross_info[j + 4];
  3791. if (st == 1) {
  3792. iter = vec[i] - lo;
  3793. } else if (st > 0) {
  3794. iter = (kmp_uint64)(vec[i] - lo) / st;
  3795. } else { // st < 0
  3796. iter = (kmp_uint64)(lo - vec[i]) / (-st);
  3797. }
  3798. iter_number = iter + ln * iter_number;
  3799. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3800. deps[i].variable.value = iter;
  3801. deps[i].dependence_type = ompt_dependence_type_source;
  3802. #endif
  3803. }
  3804. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3805. if (ompt_enabled.ompt_callback_dependences) {
  3806. ompt_callbacks.ompt_callback(ompt_callback_dependences)(
  3807. &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
  3808. }
  3809. #endif
  3810. shft = iter_number % 32; // use 32-bit granularity
  3811. iter_number >>= 5; // divided by 32
  3812. flag = 1 << shft;
  3813. KMP_MB();
  3814. if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
  3815. KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
  3816. KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
  3817. (iter_number << 5) + shft));
  3818. }
  3819. void __kmpc_doacross_fini(ident_t *loc, int gtid) {
  3820. __kmp_assert_valid_gtid(gtid);
  3821. kmp_int32 num_done;
  3822. kmp_info_t *th = __kmp_threads[gtid];
  3823. kmp_team_t *team = th->th.th_team;
  3824. kmp_disp_t *pr_buf = th->th.th_dispatch;
  3825. KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
  3826. if (team->t.t_serialized) {
  3827. KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
  3828. return; // nothing to do
  3829. }
  3830. num_done =
  3831. KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
  3832. if (num_done == th->th.th_team_nproc) {
  3833. // we are the last thread, need to free shared resources
  3834. int idx = pr_buf->th_doacross_buf_idx - 1;
  3835. dispatch_shared_info_t *sh_buf =
  3836. &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
  3837. KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
  3838. (kmp_int64)&sh_buf->doacross_num_done);
  3839. KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
  3840. KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
  3841. __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
  3842. sh_buf->doacross_flags = NULL;
  3843. sh_buf->doacross_num_done = 0;
  3844. sh_buf->doacross_buf_idx +=
  3845. __kmp_dispatch_num_buffers; // free buffer for future re-use
  3846. }
  3847. // free private resources (need to keep buffer index forever)
  3848. pr_buf->th_doacross_flags = NULL;
  3849. __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
  3850. pr_buf->th_doacross_info = NULL;
  3851. KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
  3852. }
  3853. /* OpenMP 5.1 Memory Management routines */
  3854. void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
  3855. return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
  3856. }
  3857. void *omp_aligned_alloc(size_t align, size_t size,
  3858. omp_allocator_handle_t allocator) {
  3859. return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
  3860. }
  3861. void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
  3862. return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
  3863. }
  3864. void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
  3865. omp_allocator_handle_t allocator) {
  3866. return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
  3867. }
  3868. void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
  3869. omp_allocator_handle_t free_allocator) {
  3870. return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
  3871. free_allocator);
  3872. }
  3873. void omp_free(void *ptr, omp_allocator_handle_t allocator) {
  3874. ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
  3875. }
  3876. /* end of OpenMP 5.1 Memory Management routines */
  3877. int __kmpc_get_target_offload(void) {
  3878. if (!__kmp_init_serial) {
  3879. __kmp_serial_initialize();
  3880. }
  3881. return __kmp_target_offload;
  3882. }
  3883. int __kmpc_pause_resource(kmp_pause_status_t level) {
  3884. if (!__kmp_init_serial) {
  3885. return 1; // Can't pause if runtime is not initialized
  3886. }
  3887. return __kmp_pause_resource(level);
  3888. }
  3889. void __kmpc_error(ident_t *loc, int severity, const char *message) {
  3890. if (!__kmp_init_serial)
  3891. __kmp_serial_initialize();
  3892. KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
  3893. #if OMPT_SUPPORT
  3894. if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
  3895. ompt_callbacks.ompt_callback(ompt_callback_error)(
  3896. (ompt_severity_t)severity, message, KMP_STRLEN(message),
  3897. OMPT_GET_RETURN_ADDRESS(0));
  3898. }
  3899. #endif // OMPT_SUPPORT
  3900. char *src_loc;
  3901. if (loc && loc->psource) {
  3902. kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
  3903. src_loc =
  3904. __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
  3905. __kmp_str_loc_free(&str_loc);
  3906. } else {
  3907. src_loc = __kmp_str_format("unknown");
  3908. }
  3909. if (severity == severity_warning)
  3910. KMP_WARNING(UserDirectedWarning, src_loc, message);
  3911. else
  3912. KMP_FATAL(UserDirectedError, src_loc, message);
  3913. __kmp_str_free(&src_loc);
  3914. }
  3915. // Mark begin of scope directive.
  3916. void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
  3917. // reserved is for extension of scope directive and not used.
  3918. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3919. if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
  3920. kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
  3921. int tid = __kmp_tid_from_gtid(gtid);
  3922. ompt_callbacks.ompt_callback(ompt_callback_work)(
  3923. ompt_work_scope, ompt_scope_begin,
  3924. &(team->t.ompt_team_info.parallel_data),
  3925. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
  3926. OMPT_GET_RETURN_ADDRESS(0));
  3927. }
  3928. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  3929. }
  3930. // Mark end of scope directive
  3931. void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
  3932. // reserved is for extension of scope directive and not used.
  3933. #if OMPT_SUPPORT && OMPT_OPTIONAL
  3934. if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
  3935. kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
  3936. int tid = __kmp_tid_from_gtid(gtid);
  3937. ompt_callbacks.ompt_callback(ompt_callback_work)(
  3938. ompt_work_scope, ompt_scope_end,
  3939. &(team->t.ompt_team_info.parallel_data),
  3940. &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
  3941. OMPT_GET_RETURN_ADDRESS(0));
  3942. }
  3943. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  3944. }
  3945. #ifdef KMP_USE_VERSION_SYMBOLS
  3946. // For GOMP compatibility there are two versions of each omp_* API.
  3947. // One is the plain C symbol and one is the Fortran symbol with an appended
  3948. // underscore. When we implement a specific ompc_* version of an omp_*
  3949. // function, we want the plain GOMP versioned symbol to alias the ompc_* version
  3950. // instead of the Fortran versions in kmp_ftn_entry.h
  3951. extern "C" {
  3952. // Have to undef these from omp.h so they aren't translated into
  3953. // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
  3954. #ifdef omp_set_affinity_format
  3955. #undef omp_set_affinity_format
  3956. #endif
  3957. #ifdef omp_get_affinity_format
  3958. #undef omp_get_affinity_format
  3959. #endif
  3960. #ifdef omp_display_affinity
  3961. #undef omp_display_affinity
  3962. #endif
  3963. #ifdef omp_capture_affinity
  3964. #undef omp_capture_affinity
  3965. #endif
  3966. KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
  3967. "OMP_5.0");
  3968. KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
  3969. "OMP_5.0");
  3970. KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
  3971. "OMP_5.0");
  3972. KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
  3973. "OMP_5.0");
  3974. } // extern "C"
  3975. #endif