1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537 |
- /*
- * kmp_csupport.cpp -- kfront linkage support for OpenMP.
- */
- //===----------------------------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #define __KMP_IMP
- #include "omp.h" /* extern "C" declarations of user-visible routines */
- #include "kmp.h"
- #include "kmp_error.h"
- #include "kmp_i18n.h"
- #include "kmp_itt.h"
- #include "kmp_lock.h"
- #include "kmp_stats.h"
- #include "ompt-specific.h"
- #define MAX_MESSAGE 512
- // flags will be used in future, e.g. to implement openmp_strict library
- // restrictions
- /*!
- * @ingroup STARTUP_SHUTDOWN
- * @param loc in source location information
- * @param flags in for future use (currently ignored)
- *
- * Initialize the runtime library. This call is optional; if it is not made then
- * it will be implicitly called by attempts to use other library functions.
- */
- void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
- // By default __kmpc_begin() is no-op.
- char *env;
- if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
- __kmp_str_match_true(env)) {
- __kmp_middle_initialize();
- __kmp_assign_root_init_mask();
- KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
- } else if (__kmp_ignore_mppbeg() == FALSE) {
- // By default __kmp_ignore_mppbeg() returns TRUE.
- __kmp_internal_begin();
- KC_TRACE(10, ("__kmpc_begin: called\n"));
- }
- }
- /*!
- * @ingroup STARTUP_SHUTDOWN
- * @param loc source location information
- *
- * Shutdown the runtime library. This is also optional, and even if called will
- * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
- * zero.
- */
- void __kmpc_end(ident_t *loc) {
- // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
- // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
- // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
- // returns FALSE and __kmpc_end() will unregister this root (it can cause
- // library shut down).
- if (__kmp_ignore_mppend() == FALSE) {
- KC_TRACE(10, ("__kmpc_end: called\n"));
- KA_TRACE(30, ("__kmpc_end\n"));
- __kmp_internal_end_thread(-1);
- }
- #if KMP_OS_WINDOWS && OMPT_SUPPORT
- // Normal exit process on Windows does not allow worker threads of the final
- // parallel region to finish reporting their events, so shutting down the
- // library here fixes the issue at least for the cases where __kmpc_end() is
- // placed properly.
- if (ompt_enabled.enabled)
- __kmp_internal_end_library(__kmp_gtid_get_specific());
- #endif
- }
- /*!
- @ingroup THREAD_STATES
- @param loc Source location information.
- @return The global thread index of the active thread.
- This function can be called in any context.
- If the runtime has ony been entered at the outermost level from a
- single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
- that which would be returned by omp_get_thread_num() in the outermost
- active parallel construct. (Or zero if there is no active parallel
- construct, since the primary thread is necessarily thread zero).
- If multiple non-OpenMP threads all enter an OpenMP construct then this
- will be a unique thread identifier among all the threads created by
- the OpenMP runtime (but the value cannot be defined in terms of
- OpenMP thread ids returned by omp_get_thread_num()).
- */
- kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
- kmp_int32 gtid = __kmp_entry_gtid();
- KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
- return gtid;
- }
- /*!
- @ingroup THREAD_STATES
- @param loc Source location information.
- @return The number of threads under control of the OpenMP<sup>*</sup> runtime
- This function can be called in any context.
- It returns the total number of threads under the control of the OpenMP runtime.
- That is not a number that can be determined by any OpenMP standard calls, since
- the library may be called from more than one non-OpenMP thread, and this
- reflects the total over all such calls. Similarly the runtime maintains
- underlying threads even when they are not active (since the cost of creating
- and destroying OS threads is high), this call counts all such threads even if
- they are not waiting for work.
- */
- kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
- KC_TRACE(10,
- ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
- return TCR_4(__kmp_all_nth);
- }
- /*!
- @ingroup THREAD_STATES
- @param loc Source location information.
- @return The thread number of the calling thread in the innermost active parallel
- construct.
- */
- kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
- KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
- return __kmp_tid_from_gtid(__kmp_entry_gtid());
- }
- /*!
- @ingroup THREAD_STATES
- @param loc Source location information.
- @return The number of threads in the innermost active parallel construct.
- */
- kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
- KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
- return __kmp_entry_thread()->th.th_team->t.t_nproc;
- }
- /*!
- * @ingroup DEPRECATED
- * @param loc location description
- *
- * This function need not be called. It always returns TRUE.
- */
- kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
- #ifndef KMP_DEBUG
- return TRUE;
- #else
- const char *semi2;
- const char *semi3;
- int line_no;
- if (__kmp_par_range == 0) {
- return TRUE;
- }
- semi2 = loc->psource;
- if (semi2 == NULL) {
- return TRUE;
- }
- semi2 = strchr(semi2, ';');
- if (semi2 == NULL) {
- return TRUE;
- }
- semi2 = strchr(semi2 + 1, ';');
- if (semi2 == NULL) {
- return TRUE;
- }
- if (__kmp_par_range_filename[0]) {
- const char *name = semi2 - 1;
- while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
- name--;
- }
- if ((*name == '/') || (*name == ';')) {
- name++;
- }
- if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
- return __kmp_par_range < 0;
- }
- }
- semi3 = strchr(semi2 + 1, ';');
- if (__kmp_par_range_routine[0]) {
- if ((semi3 != NULL) && (semi3 > semi2) &&
- (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
- return __kmp_par_range < 0;
- }
- }
- if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
- if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
- return __kmp_par_range > 0;
- }
- return __kmp_par_range < 0;
- }
- return TRUE;
- #endif /* KMP_DEBUG */
- }
- /*!
- @ingroup THREAD_STATES
- @param loc Source location information.
- @return 1 if this thread is executing inside an active parallel region, zero if
- not.
- */
- kmp_int32 __kmpc_in_parallel(ident_t *loc) {
- return __kmp_entry_thread()->th.th_root->r.r_active;
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param global_tid global thread number
- @param num_threads number of threads requested for this parallel construct
- Set the number of threads to be used by the next fork spawned by this thread.
- This call is only required if the parallel construct has a `num_threads` clause.
- */
- void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_threads) {
- KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
- global_tid, num_threads));
- __kmp_assert_valid_gtid(global_tid);
- __kmp_push_num_threads(loc, global_tid, num_threads);
- }
- void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
- KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
- /* the num_threads are automatically popped */
- }
- void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
- kmp_int32 proc_bind) {
- KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
- proc_bind));
- __kmp_assert_valid_gtid(global_tid);
- __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param argc total number of arguments in the ellipsis
- @param microtask pointer to callback routine consisting of outlined parallel
- construct
- @param ... pointers to shared variables that aren't global
- Do the actual fork and call the microtask in the relevant number of threads.
- */
- void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
- int gtid = __kmp_entry_gtid();
- #if (KMP_STATS_ENABLED)
- // If we were in a serial region, then stop the serial timer, record
- // the event, and start parallel region timer
- stats_state_e previous_state = KMP_GET_THREAD_STATE();
- if (previous_state == stats_state_e::SERIAL_REGION) {
- KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
- } else {
- KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
- }
- int inParallel = __kmpc_in_parallel(loc);
- if (inParallel) {
- KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
- } else {
- KMP_COUNT_BLOCK(OMP_PARALLEL);
- }
- #endif
- // maybe to save thr_state is enough here
- {
- va_list ap;
- va_start(ap, microtask);
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- kmp_info_t *master_th = __kmp_threads[gtid];
- ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(gtid);
- #endif
- #if INCLUDE_SSC_MARKS
- SSC_MARK_FORKING();
- #endif
- __kmp_fork_call(loc, gtid, fork_context_intel, argc,
- VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
- VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
- kmp_va_addr_of(ap));
- #if INCLUDE_SSC_MARKS
- SSC_MARK_JOINING();
- #endif
- __kmp_join_call(loc, gtid
- #if OMPT_SUPPORT
- ,
- fork_context_intel
- #endif
- );
- va_end(ap);
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- }
- #if KMP_STATS_ENABLED
- if (previous_state == stats_state_e::SERIAL_REGION) {
- KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
- KMP_SET_THREAD_STATE(previous_state);
- } else {
- KMP_POP_PARTITIONED_TIMER();
- }
- #endif // KMP_STATS_ENABLED
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param global_tid global thread number
- @param num_teams number of teams requested for the teams construct
- @param num_threads number of threads per team requested for the teams construct
- Set the number of teams to be used by the teams construct.
- This call is only required if the teams construct has a `num_teams` clause
- or a `thread_limit` clause (or both).
- */
- void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_teams, kmp_int32 num_threads) {
- KA_TRACE(20,
- ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
- global_tid, num_teams, num_threads));
- __kmp_assert_valid_gtid(global_tid);
- __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param global_tid global thread number
- @param num_teams_lb lower bound on number of teams requested for the teams
- construct
- @param num_teams_ub upper bound on number of teams requested for the teams
- construct
- @param num_threads number of threads per team requested for the teams construct
- Set the number of teams to be used by the teams construct. The number of initial
- teams cretaed will be greater than or equal to the lower bound and less than or
- equal to the upper bound.
- This call is only required if the teams construct has a `num_teams` clause
- or a `thread_limit` clause (or both).
- */
- void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
- kmp_int32 num_threads) {
- KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
- " num_teams_ub=%d num_threads=%d\n",
- global_tid, num_teams_lb, num_teams_ub, num_threads));
- __kmp_assert_valid_gtid(global_tid);
- __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
- num_threads);
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param argc total number of arguments in the ellipsis
- @param microtask pointer to callback routine consisting of outlined teams
- construct
- @param ... pointers to shared variables that aren't global
- Do the actual fork and call the microtask in the relevant number of threads.
- */
- void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
- ...) {
- int gtid = __kmp_entry_gtid();
- kmp_info_t *this_thr = __kmp_threads[gtid];
- va_list ap;
- va_start(ap, microtask);
- #if KMP_STATS_ENABLED
- KMP_COUNT_BLOCK(OMP_TEAMS);
- stats_state_e previous_state = KMP_GET_THREAD_STATE();
- if (previous_state == stats_state_e::SERIAL_REGION) {
- KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
- } else {
- KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
- }
- #endif
- // remember teams entry point and nesting level
- this_thr->th.th_teams_microtask = microtask;
- this_thr->th.th_teams_level =
- this_thr->th.th_team->t.t_level; // AC: can be >0 on host
- #if OMPT_SUPPORT
- kmp_team_t *parent_team = this_thr->th.th_team;
- int tid = __kmp_tid_from_gtid(gtid);
- if (ompt_enabled.enabled) {
- parent_team->t.t_implicit_task_taskdata[tid]
- .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(gtid);
- #endif
- // check if __kmpc_push_num_teams called, set default number of teams
- // otherwise
- if (this_thr->th.th_teams_size.nteams == 0) {
- __kmp_push_num_teams(loc, gtid, 0, 0);
- }
- KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
- KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
- KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
- __kmp_fork_call(
- loc, gtid, fork_context_intel, argc,
- VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
- VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
- __kmp_join_call(loc, gtid
- #if OMPT_SUPPORT
- ,
- fork_context_intel
- #endif
- );
- // Pop current CG root off list
- KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
- kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
- this_thr->th.th_cg_roots = tmp->up;
- KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
- " to node %p. cg_nthreads was %d\n",
- this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
- KMP_DEBUG_ASSERT(tmp->cg_nthreads);
- int i = tmp->cg_nthreads--;
- if (i == 1) { // check is we are the last thread in CG (not always the case)
- __kmp_free(tmp);
- }
- // Restore current task's thread_limit from CG root
- KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
- this_thr->th.th_current_task->td_icvs.thread_limit =
- this_thr->th.th_cg_roots->cg_thread_limit;
- this_thr->th.th_teams_microtask = NULL;
- this_thr->th.th_teams_level = 0;
- *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
- va_end(ap);
- #if KMP_STATS_ENABLED
- if (previous_state == stats_state_e::SERIAL_REGION) {
- KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
- KMP_SET_THREAD_STATE(previous_state);
- } else {
- KMP_POP_PARTITIONED_TIMER();
- }
- #endif // KMP_STATS_ENABLED
- }
- // I don't think this function should ever have been exported.
- // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
- // openmp code ever called it, but it's been exported from the RTL for so
- // long that I'm afraid to remove the definition.
- int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param global_tid global thread number
- Enter a serialized parallel construct. This interface is used to handle a
- conditional parallel region, like this,
- @code
- #pragma omp parallel if (condition)
- @endcode
- when the condition is false.
- */
- void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
- // The implementation is now in kmp_runtime.cpp so that it can share static
- // functions with kmp_fork_call since the tasks to be done are similar in
- // each case.
- __kmp_assert_valid_gtid(global_tid);
- #if OMPT_SUPPORT
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- __kmp_serialized_parallel(loc, global_tid);
- }
- /*!
- @ingroup PARALLEL
- @param loc source location information
- @param global_tid global thread number
- Leave a serialized parallel construct.
- */
- void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
- kmp_internal_control_t *top;
- kmp_info_t *this_thr;
- kmp_team_t *serial_team;
- KC_TRACE(10,
- ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
- /* skip all this code for autopar serialized loops since it results in
- unacceptable overhead */
- if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
- return;
- // Not autopar code
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- this_thr = __kmp_threads[global_tid];
- serial_team = this_thr->th.th_serial_team;
- kmp_task_team_t *task_team = this_thr->th.th_task_team;
- // we need to wait for the proxy tasks before finishing the thread
- if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
- task_team->tt.tt_hidden_helper_task_encountered))
- __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
- KMP_MB();
- KMP_DEBUG_ASSERT(serial_team);
- KMP_ASSERT(serial_team->t.t_serialized);
- KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
- KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
- KMP_DEBUG_ASSERT(serial_team->t.t_threads);
- KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
- OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
- if (ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
- OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
- }
- // reset clear the task id only after unlinking the task
- ompt_data_t *parent_task_data;
- __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
- if (ompt_enabled.ompt_callback_parallel_end) {
- ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
- &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
- ompt_parallel_invoker_program | ompt_parallel_team,
- OMPT_LOAD_RETURN_ADDRESS(global_tid));
- }
- __ompt_lw_taskteam_unlink(this_thr);
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
- }
- #endif
- /* If necessary, pop the internal control stack values and replace the team
- * values */
- top = serial_team->t.t_control_stack_top;
- if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
- copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
- serial_team->t.t_control_stack_top = top->next;
- __kmp_free(top);
- }
- /* pop dispatch buffers stack */
- KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
- {
- dispatch_private_info_t *disp_buffer =
- serial_team->t.t_dispatch->th_disp_buffer;
- serial_team->t.t_dispatch->th_disp_buffer =
- serial_team->t.t_dispatch->th_disp_buffer->next;
- __kmp_free(disp_buffer);
- }
- this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
- --serial_team->t.t_serialized;
- if (serial_team->t.t_serialized == 0) {
- /* return to the parallel section */
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
- __kmp_clear_x87_fpu_status_word();
- __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
- __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- __kmp_pop_current_task_from_thread(this_thr);
- #if OMPD_SUPPORT
- if (ompd_state & OMPD_ENABLE_BP)
- ompd_bp_parallel_end();
- #endif
- this_thr->th.th_team = serial_team->t.t_parent;
- this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
- /* restore values cached in the thread */
- this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
- this_thr->th.th_team_master =
- serial_team->t.t_parent->t.t_threads[0]; /* JPH */
- this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
- /* TODO the below shouldn't need to be adjusted for serialized teams */
- this_thr->th.th_dispatch =
- &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
- KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
- this_thr->th.th_current_task->td_flags.executing = 1;
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- // Copy the task team from the new child / old parent team to the thread.
- this_thr->th.th_task_team =
- this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
- KA_TRACE(20,
- ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
- "team %p\n",
- global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
- }
- #if KMP_AFFINITY_SUPPORTED
- if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
- __kmp_reset_root_init_mask(global_tid);
- }
- #endif
- } else {
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
- "depth of serial team %p to %d\n",
- global_tid, serial_team, serial_team->t.t_serialized));
- }
- }
- serial_team->t.t_level--;
- if (__kmp_env_consistency_check)
- __kmp_pop_parallel(global_tid, NULL);
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled)
- this_thr->th.ompt_thread_info.state =
- ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
- : ompt_state_work_parallel);
- #endif
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information.
- Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
- depending on the memory ordering convention obeyed by the compiler
- even that may not be necessary).
- */
- void __kmpc_flush(ident_t *loc) {
- KC_TRACE(10, ("__kmpc_flush: called\n"));
- /* need explicit __mf() here since use volatile instead in library */
- KMP_MB(); /* Flush all pending memory write invalidates. */
- #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
- #if KMP_MIC
- // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
- // We shouldn't need it, though, since the ABI rules require that
- // * If the compiler generates NGO stores it also generates the fence
- // * If users hand-code NGO stores they should insert the fence
- // therefore no incomplete unordered stores should be visible.
- #else
- // C74404
- // This is to address non-temporal store instructions (sfence needed).
- // The clflush instruction is addressed either (mfence needed).
- // Probably the non-temporal load monvtdqa instruction should also be
- // addressed.
- // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
- if (!__kmp_cpuinfo.initialized) {
- __kmp_query_cpuid(&__kmp_cpuinfo);
- }
- if (!__kmp_cpuinfo.flags.sse2) {
- // CPU cannot execute SSE2 instructions.
- } else {
- #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
- _mm_mfence();
- #elif KMP_COMPILER_MSVC
- MemoryBarrier();
- #else
- __sync_synchronize();
- #endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
- }
- #endif // KMP_MIC
- #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
- KMP_ARCH_RISCV64)
- // Nothing to see here move along
- #elif KMP_ARCH_PPC64
- // Nothing needed here (we have a real MB above).
- #else
- #error Unknown or unsupported architecture
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_flush) {
- ompt_callbacks.ompt_callback(ompt_callback_flush)(
- __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif
- }
- /* -------------------------------------------------------------------------- */
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid thread id.
- Execute a barrier.
- */
- void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
- KMP_COUNT_BLOCK(OMP_BARRIER);
- KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- if (__kmp_env_consistency_check) {
- if (loc == 0) {
- KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
- }
- __kmp_check_barrier(global_tid, ct_barrier, loc);
- }
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- __kmp_threads[global_tid]->th.th_ident = loc;
- // TODO: explicit barrier_wait_id:
- // this function is called when 'barrier' directive is present or
- // implicit barrier at the end of a worksharing construct.
- // 1) better to add a per-thread barrier counter to a thread data structure
- // 2) set to 0 when a new team is created
- // 4) no sync is required
- __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- }
- /* The BARRIER for a MASTER section is always explicit */
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number .
- @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
- */
- kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
- int status = 0;
- KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- if (KMP_MASTER_GTID(global_tid)) {
- KMP_COUNT_BLOCK(OMP_MASTER);
- KMP_PUSH_PARTITIONED_TIMER(OMP_master);
- status = 1;
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (status) {
- if (ompt_enabled.ompt_callback_masked) {
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- int tid = __kmp_tid_from_gtid(global_tid);
- ompt_callbacks.ompt_callback(ompt_callback_masked)(
- ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- OMPT_GET_RETURN_ADDRESS(0));
- }
- }
- #endif
- if (__kmp_env_consistency_check) {
- #if KMP_USE_DYNAMIC_LOCK
- if (status)
- __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
- else
- __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
- #else
- if (status)
- __kmp_push_sync(global_tid, ct_master, loc, NULL);
- else
- __kmp_check_sync(global_tid, ct_master, loc, NULL);
- #endif
- }
- return status;
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number .
- Mark the end of a <tt>master</tt> region. This should only be called by the
- thread that executes the <tt>master</tt> region.
- */
- void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
- KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
- KMP_POP_PARTITIONED_TIMER();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- if (ompt_enabled.ompt_callback_masked) {
- int tid = __kmp_tid_from_gtid(global_tid);
- ompt_callbacks.ompt_callback(ompt_callback_masked)(
- ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif
- if (__kmp_env_consistency_check) {
- if (KMP_MASTER_GTID(global_tid))
- __kmp_pop_sync(global_tid, ct_master, loc);
- }
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number.
- @param filter result of evaluating filter clause on thread global_tid, or zero
- if no filter clause present
- @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
- */
- kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
- int status = 0;
- int tid;
- KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- tid = __kmp_tid_from_gtid(global_tid);
- if (tid == filter) {
- KMP_COUNT_BLOCK(OMP_MASKED);
- KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
- status = 1;
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (status) {
- if (ompt_enabled.ompt_callback_masked) {
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- ompt_callbacks.ompt_callback(ompt_callback_masked)(
- ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- OMPT_GET_RETURN_ADDRESS(0));
- }
- }
- #endif
- if (__kmp_env_consistency_check) {
- #if KMP_USE_DYNAMIC_LOCK
- if (status)
- __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
- else
- __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
- #else
- if (status)
- __kmp_push_sync(global_tid, ct_masked, loc, NULL);
- else
- __kmp_check_sync(global_tid, ct_masked, loc, NULL);
- #endif
- }
- return status;
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number .
- Mark the end of a <tt>masked</tt> region. This should only be called by the
- thread that executes the <tt>masked</tt> region.
- */
- void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
- KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- KMP_POP_PARTITIONED_TIMER();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- if (ompt_enabled.ompt_callback_masked) {
- int tid = __kmp_tid_from_gtid(global_tid);
- ompt_callbacks.ompt_callback(ompt_callback_masked)(
- ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif
- if (__kmp_env_consistency_check) {
- __kmp_pop_sync(global_tid, ct_masked, loc);
- }
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param gtid global thread number.
- Start execution of an <tt>ordered</tt> construct.
- */
- void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
- int cid = 0;
- kmp_info_t *th;
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
- __kmp_assert_valid_gtid(gtid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- #if USE_ITT_BUILD
- __kmp_itt_ordered_prep(gtid);
- // TODO: ordered_wait_id
- #endif /* USE_ITT_BUILD */
- th = __kmp_threads[gtid];
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- kmp_team_t *team;
- ompt_wait_id_t lck;
- void *codeptr_ra;
- OMPT_STORE_RETURN_ADDRESS(gtid);
- if (ompt_enabled.enabled) {
- team = __kmp_team_from_gtid(gtid);
- lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
- /* OMPT state update */
- th->th.ompt_thread_info.wait_id = lck;
- th->th.ompt_thread_info.state = ompt_state_wait_ordered;
- /* OMPT event callback */
- codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
- codeptr_ra);
- }
- }
- #endif
- if (th->th.th_dispatch->th_deo_fcn != 0)
- (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
- else
- __kmp_parallel_deo(>id, &cid, loc);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- /* OMPT state update */
- th->th.ompt_thread_info.state = ompt_state_work_parallel;
- th->th.ompt_thread_info.wait_id = 0;
- /* OMPT event callback */
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
- }
- }
- #endif
- #if USE_ITT_BUILD
- __kmp_itt_ordered_start(gtid);
- #endif /* USE_ITT_BUILD */
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param gtid global thread number.
- End execution of an <tt>ordered</tt> construct.
- */
- void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
- int cid = 0;
- kmp_info_t *th;
- KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
- __kmp_assert_valid_gtid(gtid);
- #if USE_ITT_BUILD
- __kmp_itt_ordered_end(gtid);
- // TODO: ordered_wait_id
- #endif /* USE_ITT_BUILD */
- th = __kmp_threads[gtid];
- if (th->th.th_dispatch->th_dxo_fcn != 0)
- (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
- else
- __kmp_parallel_dxo(>id, &cid, loc);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
- if (ompt_enabled.ompt_callback_mutex_released) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_ordered,
- (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
- ->t.t_ordered.dt.t_value,
- OMPT_LOAD_RETURN_ADDRESS(gtid));
- }
- #endif
- }
- #if KMP_USE_DYNAMIC_LOCK
- static __forceinline void
- __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
- kmp_int32 gtid, kmp_indirect_locktag_t tag) {
- // Pointer to the allocated indirect lock is written to crit, while indexing
- // is ignored.
- void *idx;
- kmp_indirect_lock_t **lck;
- lck = (kmp_indirect_lock_t **)crit;
- kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
- KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
- KMP_SET_I_LOCK_LOCATION(ilk, loc);
- KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
- KA_TRACE(20,
- ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
- #if USE_ITT_BUILD
- __kmp_itt_critical_creating(ilk->lock, loc);
- #endif
- int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
- if (status == 0) {
- #if USE_ITT_BUILD
- __kmp_itt_critical_destroyed(ilk->lock);
- #endif
- // We don't really need to destroy the unclaimed lock here since it will be
- // cleaned up at program exit.
- // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
- }
- KMP_DEBUG_ASSERT(*lck != NULL);
- }
- // Fast-path acquire tas lock
- #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
- { \
- kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
- kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
- kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
- if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
- !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
- kmp_uint32 spins; \
- KMP_FSYNC_PREPARE(l); \
- KMP_INIT_YIELD(spins); \
- kmp_backoff_t backoff = __kmp_spin_backoff_params; \
- do { \
- if (TCR_4(__kmp_nth) > \
- (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
- KMP_YIELD(TRUE); \
- } else { \
- KMP_YIELD_SPIN(spins); \
- } \
- __kmp_spin_backoff(&backoff); \
- } while ( \
- KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
- !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
- } \
- KMP_FSYNC_ACQUIRED(l); \
- }
- // Fast-path test tas lock
- #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
- { \
- kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
- kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
- kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
- rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
- __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
- }
- // Fast-path release tas lock
- #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
- { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
- #if KMP_USE_FUTEX
- #include <sys/syscall.h>
- #include <unistd.h>
- #ifndef FUTEX_WAIT
- #define FUTEX_WAIT 0
- #endif
- #ifndef FUTEX_WAKE
- #define FUTEX_WAKE 1
- #endif
- // Fast-path acquire futex lock
- #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
- { \
- kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
- kmp_int32 gtid_code = (gtid + 1) << 1; \
- KMP_MB(); \
- KMP_FSYNC_PREPARE(ftx); \
- kmp_int32 poll_val; \
- while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
- &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
- KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
- kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
- if (!cond) { \
- if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
- poll_val | \
- KMP_LOCK_BUSY(1, futex))) { \
- continue; \
- } \
- poll_val |= KMP_LOCK_BUSY(1, futex); \
- } \
- kmp_int32 rc; \
- if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
- NULL, NULL, 0)) != 0) { \
- continue; \
- } \
- gtid_code |= 1; \
- } \
- KMP_FSYNC_ACQUIRED(ftx); \
- }
- // Fast-path test futex lock
- #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
- { \
- kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
- if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
- KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
- KMP_FSYNC_ACQUIRED(ftx); \
- rc = TRUE; \
- } else { \
- rc = FALSE; \
- } \
- }
- // Fast-path release futex lock
- #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
- { \
- kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
- KMP_MB(); \
- KMP_FSYNC_RELEASING(ftx); \
- kmp_int32 poll_val = \
- KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
- if (KMP_LOCK_STRIP(poll_val) & 1) { \
- syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
- KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
- } \
- KMP_MB(); \
- KMP_YIELD_OVERSUB(); \
- }
- #endif // KMP_USE_FUTEX
- #else // KMP_USE_DYNAMIC_LOCK
- static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
- ident_t const *loc,
- kmp_int32 gtid) {
- kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
- // Because of the double-check, the following load doesn't need to be volatile
- kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
- if (lck == NULL) {
- void *idx;
- // Allocate & initialize the lock.
- // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
- lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
- __kmp_init_user_lock_with_checks(lck);
- __kmp_set_user_lock_location(lck, loc);
- #if USE_ITT_BUILD
- __kmp_itt_critical_creating(lck);
- // __kmp_itt_critical_creating() should be called *before* the first usage
- // of underlying lock. It is the only place where we can guarantee it. There
- // are chances the lock will destroyed with no usage, but it is not a
- // problem, because this is not real event seen by user but rather setting
- // name for object (lock). See more details in kmp_itt.h.
- #endif /* USE_ITT_BUILD */
- // Use a cmpxchg instruction to slam the start of the critical section with
- // the lock pointer. If another thread beat us to it, deallocate the lock,
- // and use the lock that the other thread allocated.
- int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
- if (status == 0) {
- // Deallocate the lock and reload the value.
- #if USE_ITT_BUILD
- __kmp_itt_critical_destroyed(lck);
- // Let ITT know the lock is destroyed and the same memory location may be reused
- // for another purpose.
- #endif /* USE_ITT_BUILD */
- __kmp_destroy_user_lock_with_checks(lck);
- __kmp_user_lock_free(&idx, gtid, lck);
- lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
- KMP_DEBUG_ASSERT(lck != NULL);
- }
- }
- return lck;
- }
- #endif // KMP_USE_DYNAMIC_LOCK
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number.
- @param crit identity of the critical section. This could be a pointer to a lock
- associated with the critical section, or some other suitably unique value.
- Enter code protected by a `critical` construct.
- This function blocks until the executing thread can enter the critical section.
- */
- void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *crit) {
- #if KMP_USE_DYNAMIC_LOCK
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif // OMPT_SUPPORT
- __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
- #else
- KMP_COUNT_BLOCK(OMP_CRITICAL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_state_t prev_state = ompt_state_undefined;
- ompt_thread_info_t ti;
- #endif
- kmp_user_lock_p lck;
- KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- // TODO: add THR_OVHD_STATE
- KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
- KMP_CHECK_USER_LOCK_INIT();
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
- lck = (kmp_user_lock_p)crit;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
- lck = (kmp_user_lock_p)crit;
- }
- #endif
- else { // ticket, queuing or drdpa
- lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
- }
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_critical, loc, lck);
- // since the critical directive binds to all threads, not just the current
- // team we have to check this even if we are in a serialized team.
- // also, even if we are the uber thread, we still have to conduct the lock,
- // as we have to contend with sibling threads.
- #if USE_ITT_BUILD
- __kmp_itt_critical_acquiring(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- OMPT_STORE_RETURN_ADDRESS(gtid);
- void *codeptr_ra = NULL;
- if (ompt_enabled.enabled) {
- ti = __kmp_threads[global_tid]->th.ompt_thread_info;
- /* OMPT state update */
- prev_state = ti.state;
- ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
- ti.state = ompt_state_wait_critical;
- /* OMPT event callback */
- codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
- }
- }
- #endif
- // Value of 'crit' should be good for using as a critical_id of the critical
- // section directive.
- __kmp_acquire_user_lock_with_checks(lck, global_tid);
- #if USE_ITT_BUILD
- __kmp_itt_critical_acquired(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- /* OMPT state update */
- ti.state = prev_state;
- ti.wait_id = 0;
- /* OMPT event callback */
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
- }
- }
- #endif
- KMP_POP_PARTITIONED_TIMER();
- KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
- KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- #if KMP_USE_DYNAMIC_LOCK
- // Converts the given hint to an internal lock implementation
- static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
- #if KMP_USE_TSX
- #define KMP_TSX_LOCK(seq) lockseq_##seq
- #else
- #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
- #endif
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
- #else
- #define KMP_CPUINFO_RTM 0
- #endif
- // Hints that do not require further logic
- if (hint & kmp_lock_hint_hle)
- return KMP_TSX_LOCK(hle);
- if (hint & kmp_lock_hint_rtm)
- return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
- if (hint & kmp_lock_hint_adaptive)
- return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
- // Rule out conflicting hints first by returning the default lock
- if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
- return __kmp_user_lock_seq;
- if ((hint & omp_lock_hint_speculative) &&
- (hint & omp_lock_hint_nonspeculative))
- return __kmp_user_lock_seq;
- // Do not even consider speculation when it appears to be contended
- if (hint & omp_lock_hint_contended)
- return lockseq_queuing;
- // Uncontended lock without speculation
- if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
- return lockseq_tas;
- // Use RTM lock for speculation
- if (hint & omp_lock_hint_speculative)
- return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
- return __kmp_user_lock_seq;
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- #if KMP_USE_DYNAMIC_LOCK
- static kmp_mutex_impl_t
- __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
- if (user_lock) {
- switch (KMP_EXTRACT_D_TAG(user_lock)) {
- case 0:
- break;
- #if KMP_USE_FUTEX
- case locktag_futex:
- return kmp_mutex_impl_queuing;
- #endif
- case locktag_tas:
- return kmp_mutex_impl_spin;
- #if KMP_USE_TSX
- case locktag_hle:
- case locktag_rtm_spin:
- return kmp_mutex_impl_speculative;
- #endif
- default:
- return kmp_mutex_impl_none;
- }
- ilock = KMP_LOOKUP_I_LOCK(user_lock);
- }
- KMP_ASSERT(ilock);
- switch (ilock->type) {
- #if KMP_USE_TSX
- case locktag_adaptive:
- case locktag_rtm_queuing:
- return kmp_mutex_impl_speculative;
- #endif
- case locktag_nested_tas:
- return kmp_mutex_impl_spin;
- #if KMP_USE_FUTEX
- case locktag_nested_futex:
- #endif
- case locktag_ticket:
- case locktag_queuing:
- case locktag_drdpa:
- case locktag_nested_ticket:
- case locktag_nested_queuing:
- case locktag_nested_drdpa:
- return kmp_mutex_impl_queuing;
- default:
- return kmp_mutex_impl_none;
- }
- }
- #else
- // For locks without dynamic binding
- static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
- switch (__kmp_user_lock_kind) {
- case lk_tas:
- return kmp_mutex_impl_spin;
- #if KMP_USE_FUTEX
- case lk_futex:
- #endif
- case lk_ticket:
- case lk_queuing:
- case lk_drdpa:
- return kmp_mutex_impl_queuing;
- #if KMP_USE_TSX
- case lk_hle:
- case lk_rtm_queuing:
- case lk_rtm_spin:
- case lk_adaptive:
- return kmp_mutex_impl_speculative;
- #endif
- default:
- return kmp_mutex_impl_none;
- }
- }
- #endif // KMP_USE_DYNAMIC_LOCK
- #endif // OMPT_SUPPORT && OMPT_OPTIONAL
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number.
- @param crit identity of the critical section. This could be a pointer to a lock
- associated with the critical section, or some other suitably unique value.
- @param hint the lock hint.
- Enter code protected by a `critical` construct with a hint. The hint value is
- used to suggest a lock implementation. This function blocks until the executing
- thread can enter the critical section unless the hint suggests use of
- speculative execution and the hardware supports it.
- */
- void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *crit, uint32_t hint) {
- KMP_COUNT_BLOCK(OMP_CRITICAL);
- kmp_user_lock_p lck;
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_state_t prev_state = ompt_state_undefined;
- ompt_thread_info_t ti;
- // This is the case, if called from __kmpc_critical:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- #endif
- KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
- // Check if it is initialized.
- KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
- kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
- if (*lk == 0) {
- if (KMP_IS_D_LOCK(lockseq)) {
- KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
- KMP_GET_D_TAG(lockseq));
- } else {
- __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
- }
- }
- // Branch for accessing the actual lock object and set operation. This
- // branching is inevitable since this lock initialization does not follow the
- // normal dispatch path (lock table is not used).
- if (KMP_EXTRACT_D_TAG(lk) != 0) {
- lck = (kmp_user_lock_p)lk;
- if (__kmp_env_consistency_check) {
- __kmp_push_sync(global_tid, ct_critical, loc, lck,
- __kmp_map_hint_to_lock(hint));
- }
- #if USE_ITT_BUILD
- __kmp_itt_critical_acquiring(lck);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ti = __kmp_threads[global_tid]->th.ompt_thread_info;
- /* OMPT state update */
- prev_state = ti.state;
- ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
- ti.state = ompt_state_wait_critical;
- /* OMPT event callback */
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_critical, (unsigned int)hint,
- __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
- codeptr);
- }
- }
- #endif
- #if KMP_USE_INLINED_TAS
- if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
- KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
- } else
- #elif KMP_USE_INLINED_FUTEX
- if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
- KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
- } else
- #endif
- {
- KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
- }
- } else {
- kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
- lck = ilk->lock;
- if (__kmp_env_consistency_check) {
- __kmp_push_sync(global_tid, ct_critical, loc, lck,
- __kmp_map_hint_to_lock(hint));
- }
- #if USE_ITT_BUILD
- __kmp_itt_critical_acquiring(lck);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ti = __kmp_threads[global_tid]->th.ompt_thread_info;
- /* OMPT state update */
- prev_state = ti.state;
- ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
- ti.state = ompt_state_wait_critical;
- /* OMPT event callback */
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_critical, (unsigned int)hint,
- __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
- codeptr);
- }
- }
- #endif
- KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
- }
- KMP_POP_PARTITIONED_TIMER();
- #if USE_ITT_BUILD
- __kmp_itt_critical_acquired(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- /* OMPT state update */
- ti.state = prev_state;
- ti.wait_id = 0;
- /* OMPT event callback */
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- }
- #endif
- KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
- KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
- } // __kmpc_critical_with_hint
- #endif // KMP_USE_DYNAMIC_LOCK
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param global_tid global thread number .
- @param crit identity of the critical section. This could be a pointer to a lock
- associated with the critical section, or some other suitably unique value.
- Leave a critical section, releasing any lock that was held during its execution.
- */
- void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *crit) {
- kmp_user_lock_p lck;
- KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
- #if KMP_USE_DYNAMIC_LOCK
- int locktag = KMP_EXTRACT_D_TAG(crit);
- if (locktag) {
- lck = (kmp_user_lock_p)crit;
- KMP_ASSERT(lck != NULL);
- if (__kmp_env_consistency_check) {
- __kmp_pop_sync(global_tid, ct_critical, loc);
- }
- #if USE_ITT_BUILD
- __kmp_itt_critical_releasing(lck);
- #endif
- #if KMP_USE_INLINED_TAS
- if (locktag == locktag_tas && !__kmp_env_consistency_check) {
- KMP_RELEASE_TAS_LOCK(lck, global_tid);
- } else
- #elif KMP_USE_INLINED_FUTEX
- if (locktag == locktag_futex && !__kmp_env_consistency_check) {
- KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
- } else
- #endif
- {
- KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
- }
- } else {
- kmp_indirect_lock_t *ilk =
- (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
- KMP_ASSERT(ilk != NULL);
- lck = ilk->lock;
- if (__kmp_env_consistency_check) {
- __kmp_pop_sync(global_tid, ct_critical, loc);
- }
- #if USE_ITT_BUILD
- __kmp_itt_critical_releasing(lck);
- #endif
- KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
- }
- #else // KMP_USE_DYNAMIC_LOCK
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
- lck = (kmp_user_lock_p)crit;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
- lck = (kmp_user_lock_p)crit;
- }
- #endif
- else { // ticket, queuing or drdpa
- lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
- }
- KMP_ASSERT(lck != NULL);
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_critical, loc);
- #if USE_ITT_BUILD
- __kmp_itt_critical_releasing(lck);
- #endif /* USE_ITT_BUILD */
- // Value of 'crit' should be good for using as a critical_id of the critical
- // section directive.
- __kmp_release_user_lock_with_checks(lck, global_tid);
- #endif // KMP_USE_DYNAMIC_LOCK
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- /* OMPT release event triggers after lock is released; place here to trigger
- * for all #if branches */
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- if (ompt_enabled.ompt_callback_mutex_released) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
- OMPT_LOAD_RETURN_ADDRESS(0));
- }
- #endif
- KMP_POP_PARTITIONED_TIMER();
- KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid thread id.
- @return one if the thread should execute the master block, zero otherwise
- Start execution of a combined barrier and master. The barrier is executed inside
- this function.
- */
- kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
- int status;
- KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- if (__kmp_env_consistency_check)
- __kmp_check_barrier(global_tid, ct_barrier, loc);
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- return (status != 0) ? 0 : 1;
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid thread id.
- Complete the execution of a combined barrier and master. This function should
- only be called at the completion of the <tt>master</tt> code. Other threads will
- still be waiting at the barrier and this call releases them.
- */
- void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
- KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- __kmp_end_split_barrier(bs_plain_barrier, global_tid);
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid thread id.
- @return one if the thread should execute the master block, zero otherwise
- Start execution of a combined barrier and master(nowait) construct.
- The barrier is executed inside this function.
- There is no equivalent "end" function, since the
- */
- kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
- kmp_int32 ret;
- KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- if (__kmp_env_consistency_check) {
- if (loc == 0) {
- KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
- }
- __kmp_check_barrier(global_tid, ct_barrier, loc);
- }
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- ret = __kmpc_master(loc, global_tid);
- if (__kmp_env_consistency_check) {
- /* there's no __kmpc_end_master called; so the (stats) */
- /* actions of __kmpc_end_master are done here */
- if (ret) {
- /* only one thread should do the pop since only */
- /* one did the push (see __kmpc_master()) */
- __kmp_pop_sync(global_tid, ct_master, loc);
- }
- }
- return (ret);
- }
- /* The BARRIER for a SINGLE process section is always explicit */
- /*!
- @ingroup WORK_SHARING
- @param loc source location information
- @param global_tid global thread number
- @return One if this thread should execute the single construct, zero otherwise.
- Test whether to execute a <tt>single</tt> construct.
- There are no implicit barriers in the two "single" calls, rather the compiler
- should introduce an explicit barrier if it is required.
- */
- kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
- __kmp_assert_valid_gtid(global_tid);
- kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
- if (rc) {
- // We are going to execute the single statement, so we should count it.
- KMP_COUNT_BLOCK(OMP_SINGLE);
- KMP_PUSH_PARTITIONED_TIMER(OMP_single);
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- int tid = __kmp_tid_from_gtid(global_tid);
- if (ompt_enabled.enabled) {
- if (rc) {
- if (ompt_enabled.ompt_callback_work) {
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_single_executor, ompt_scope_begin,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- 1, OMPT_GET_RETURN_ADDRESS(0));
- }
- } else {
- if (ompt_enabled.ompt_callback_work) {
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_single_other, ompt_scope_begin,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- 1, OMPT_GET_RETURN_ADDRESS(0));
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_single_other, ompt_scope_end,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
- 1, OMPT_GET_RETURN_ADDRESS(0));
- }
- }
- }
- #endif
- return rc;
- }
- /*!
- @ingroup WORK_SHARING
- @param loc source location information
- @param global_tid global thread number
- Mark the end of a <tt>single</tt> construct. This function should
- only be called by the thread that executed the block of code protected
- by the `single` construct.
- */
- void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
- __kmp_assert_valid_gtid(global_tid);
- __kmp_exit_single(global_tid);
- KMP_POP_PARTITIONED_TIMER();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- kmp_info_t *this_thr = __kmp_threads[global_tid];
- kmp_team_t *team = this_thr->th.th_team;
- int tid = __kmp_tid_from_gtid(global_tid);
- if (ompt_enabled.ompt_callback_work) {
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_single_executor, ompt_scope_end,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif
- }
- /*!
- @ingroup WORK_SHARING
- @param loc Source location
- @param global_tid Global thread id
- Mark the end of a statically scheduled loop.
- */
- void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
- KMP_POP_PARTITIONED_TIMER();
- KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_work) {
- ompt_work_t ompt_work_type = ompt_work_loop;
- ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
- ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
- // Determine workshare type
- if (loc != NULL) {
- if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
- ompt_work_type = ompt_work_loop;
- } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
- ompt_work_type = ompt_work_sections;
- } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
- ompt_work_type = ompt_work_distribute;
- } else {
- // use default set above.
- // a warning about this case is provided in __kmpc_for_static_init
- }
- KMP_DEBUG_ASSERT(ompt_work_type);
- }
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
- &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif
- if (__kmp_env_consistency_check)
- __kmp_pop_workshare(global_tid, ct_pdo, loc);
- }
- // User routines which take C-style arguments (call by value)
- // different from the Fortran equivalent routines
- void ompc_set_num_threads(int arg) {
- // !!!!! TODO: check the per-task binding
- __kmp_set_num_threads(arg, __kmp_entry_gtid());
- }
- void ompc_set_dynamic(int flag) {
- kmp_info_t *thread;
- /* For the thread-private implementation of the internal controls */
- thread = __kmp_entry_thread();
- __kmp_save_internal_controls(thread);
- set__dynamic(thread, flag ? true : false);
- }
- void ompc_set_nested(int flag) {
- kmp_info_t *thread;
- /* For the thread-private internal controls implementation */
- thread = __kmp_entry_thread();
- __kmp_save_internal_controls(thread);
- set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
- }
- void ompc_set_max_active_levels(int max_active_levels) {
- /* TO DO */
- /* we want per-task implementation of this internal control */
- /* For the per-thread internal controls implementation */
- __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
- }
- void ompc_set_schedule(omp_sched_t kind, int modifier) {
- // !!!!! TODO: check the per-task binding
- __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
- }
- int ompc_get_ancestor_thread_num(int level) {
- return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
- }
- int ompc_get_team_size(int level) {
- return __kmp_get_team_size(__kmp_entry_gtid(), level);
- }
- /* OpenMP 5.0 Affinity Format API */
- void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
- if (!__kmp_init_serial) {
- __kmp_serial_initialize();
- }
- __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
- format, KMP_STRLEN(format) + 1);
- }
- size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
- size_t format_size;
- if (!__kmp_init_serial) {
- __kmp_serial_initialize();
- }
- format_size = KMP_STRLEN(__kmp_affinity_format);
- if (buffer && size) {
- __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
- format_size + 1);
- }
- return format_size;
- }
- void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
- int gtid;
- if (!TCR_4(__kmp_init_middle)) {
- __kmp_middle_initialize();
- }
- __kmp_assign_root_init_mask();
- gtid = __kmp_get_gtid();
- #if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
- __kmp_reset_root_init_mask(gtid);
- }
- #endif
- __kmp_aux_display_affinity(gtid, format);
- }
- size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
- char const *format) {
- int gtid;
- size_t num_required;
- kmp_str_buf_t capture_buf;
- if (!TCR_4(__kmp_init_middle)) {
- __kmp_middle_initialize();
- }
- __kmp_assign_root_init_mask();
- gtid = __kmp_get_gtid();
- #if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
- __kmp_reset_root_init_mask(gtid);
- }
- #endif
- __kmp_str_buf_init(&capture_buf);
- num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
- if (buffer && buf_size) {
- __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
- capture_buf.used + 1);
- }
- __kmp_str_buf_free(&capture_buf);
- return num_required;
- }
- void kmpc_set_stacksize(int arg) {
- // __kmp_aux_set_stacksize initializes the library if needed
- __kmp_aux_set_stacksize(arg);
- }
- void kmpc_set_stacksize_s(size_t arg) {
- // __kmp_aux_set_stacksize initializes the library if needed
- __kmp_aux_set_stacksize(arg);
- }
- void kmpc_set_blocktime(int arg) {
- int gtid, tid;
- kmp_info_t *thread;
- gtid = __kmp_entry_gtid();
- tid = __kmp_tid_from_gtid(gtid);
- thread = __kmp_thread_from_gtid(gtid);
- __kmp_aux_set_blocktime(arg, thread, tid);
- }
- void kmpc_set_library(int arg) {
- // __kmp_user_set_library initializes the library if needed
- __kmp_user_set_library((enum library_type)arg);
- }
- void kmpc_set_defaults(char const *str) {
- // __kmp_aux_set_defaults initializes the library if needed
- __kmp_aux_set_defaults(str, KMP_STRLEN(str));
- }
- void kmpc_set_disp_num_buffers(int arg) {
- // ignore after initialization because some teams have already
- // allocated dispatch buffers
- if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
- arg <= KMP_MAX_DISP_NUM_BUFF) {
- __kmp_dispatch_num_buffers = arg;
- }
- }
- int kmpc_set_affinity_mask_proc(int proc, void **mask) {
- #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
- return -1;
- #else
- if (!TCR_4(__kmp_init_middle)) {
- __kmp_middle_initialize();
- }
- __kmp_assign_root_init_mask();
- return __kmp_aux_set_affinity_mask_proc(proc, mask);
- #endif
- }
- int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
- #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
- return -1;
- #else
- if (!TCR_4(__kmp_init_middle)) {
- __kmp_middle_initialize();
- }
- __kmp_assign_root_init_mask();
- return __kmp_aux_unset_affinity_mask_proc(proc, mask);
- #endif
- }
- int kmpc_get_affinity_mask_proc(int proc, void **mask) {
- #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
- return -1;
- #else
- if (!TCR_4(__kmp_init_middle)) {
- __kmp_middle_initialize();
- }
- __kmp_assign_root_init_mask();
- return __kmp_aux_get_affinity_mask_proc(proc, mask);
- #endif
- }
- /* -------------------------------------------------------------------------- */
- /*!
- @ingroup THREADPRIVATE
- @param loc source location information
- @param gtid global thread number
- @param cpy_size size of the cpy_data buffer
- @param cpy_data pointer to data to be copied
- @param cpy_func helper function to call for copying data
- @param didit flag variable: 1=single thread; 0=not single thread
- __kmpc_copyprivate implements the interface for the private data broadcast
- needed for the copyprivate clause associated with a single region in an
- OpenMP<sup>*</sup> program (both C and Fortran).
- All threads participating in the parallel region call this routine.
- One of the threads (called the single thread) should have the <tt>didit</tt>
- variable set to 1 and all other threads should have that variable set to 0.
- All threads pass a pointer to a data buffer (cpy_data) that they have built.
- The OpenMP specification forbids the use of nowait on the single region when a
- copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
- barrier internally to avoid race conditions, so the code generation for the
- single region should avoid generating a barrier after the call to @ref
- __kmpc_copyprivate.
- The <tt>gtid</tt> parameter is the global thread id for the current thread.
- The <tt>loc</tt> parameter is a pointer to source location information.
- Internal implementation: The single thread will first copy its descriptor
- address (cpy_data) to a team-private location, then the other threads will each
- call the function pointed to by the parameter cpy_func, which carries out the
- copy by copying the data using the cpy_data buffer.
- The cpy_func routine used for the copy and the contents of the data area defined
- by cpy_data and cpy_size may be built in any fashion that will allow the copy
- to be done. For instance, the cpy_data buffer can hold the actual data to be
- copied or it may hold a list of pointers to the data. The cpy_func routine must
- interpret the cpy_data buffer appropriately.
- The interface to cpy_func is as follows:
- @code
- void cpy_func( void *destination, void *source )
- @endcode
- where void *destination is the cpy_data pointer for the thread being copied to
- and void *source is the cpy_data pointer for the thread being copied from.
- */
- void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
- void *cpy_data, void (*cpy_func)(void *, void *),
- kmp_int32 didit) {
- void **data_ptr;
- KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
- __kmp_assert_valid_gtid(gtid);
- KMP_MB();
- data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
- if (__kmp_env_consistency_check) {
- if (loc == 0) {
- KMP_WARNING(ConstructIdentInvalid);
- }
- }
- // ToDo: Optimize the following two barriers into some kind of split barrier
- if (didit)
- *data_ptr = cpy_data;
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(gtid);
- #endif
- /* This barrier is not a barrier region boundary */
- #if USE_ITT_NOTIFY
- __kmp_threads[gtid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
- if (!didit)
- (*cpy_func)(cpy_data, *data_ptr);
- // Consider next barrier a user-visible barrier for barrier region boundaries
- // Nesting checks are already handled by the single construct checks
- {
- #if OMPT_SUPPORT
- OMPT_STORE_RETURN_ADDRESS(gtid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
- // tasks can overwrite the location)
- #endif
- __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- }
- }
- /* --------------------------------------------------------------------------*/
- /*!
- @ingroup THREADPRIVATE
- @param loc source location information
- @param gtid global thread number
- @param cpy_data pointer to the data to be saved/copied or 0
- @return the saved pointer to the data
- __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
- __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
- coming from single), and returns that pointer in all calls (for single thread
- it's not needed). This version doesn't do any actual data copying. Data copying
- has to be done somewhere else, e.g. inline in the generated code. Due to this,
- this function doesn't have any barrier at the end of the function, like
- __kmpc_copyprivate does, so generated code needs barrier after copying of all
- data was done.
- */
- void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
- void **data_ptr;
- KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
- KMP_MB();
- data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
- if (__kmp_env_consistency_check) {
- if (loc == 0) {
- KMP_WARNING(ConstructIdentInvalid);
- }
- }
- // ToDo: Optimize the following barrier
- if (cpy_data)
- *data_ptr = cpy_data;
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- OMPT_STORE_RETURN_ADDRESS(gtid);
- }
- #endif
- /* This barrier is not a barrier region boundary */
- #if USE_ITT_NOTIFY
- __kmp_threads[gtid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
- return *data_ptr;
- }
- /* -------------------------------------------------------------------------- */
- #define INIT_LOCK __kmp_init_user_lock_with_checks
- #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
- #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
- #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
- #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
- #define ACQUIRE_NESTED_LOCK_TIMED \
- __kmp_acquire_nested_user_lock_with_checks_timed
- #define RELEASE_LOCK __kmp_release_user_lock_with_checks
- #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
- #define TEST_LOCK __kmp_test_user_lock_with_checks
- #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
- #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
- #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
- // TODO: Make check abort messages use location info & pass it into
- // with_checks routines
- #if KMP_USE_DYNAMIC_LOCK
- // internal lock initializer
- static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
- kmp_dyna_lockseq_t seq) {
- if (KMP_IS_D_LOCK(seq)) {
- KMP_INIT_D_LOCK(lock, seq);
- #if USE_ITT_BUILD
- __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
- #endif
- } else {
- KMP_INIT_I_LOCK(lock, seq);
- #if USE_ITT_BUILD
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __kmp_itt_lock_creating(ilk->lock, loc);
- #endif
- }
- }
- // internal nest lock initializer
- static __forceinline void
- __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
- kmp_dyna_lockseq_t seq) {
- #if KMP_USE_TSX
- // Don't have nested lock implementation for speculative locks
- if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
- seq == lockseq_rtm_spin || seq == lockseq_adaptive)
- seq = __kmp_user_lock_seq;
- #endif
- switch (seq) {
- case lockseq_tas:
- seq = lockseq_nested_tas;
- break;
- #if KMP_USE_FUTEX
- case lockseq_futex:
- seq = lockseq_nested_futex;
- break;
- #endif
- case lockseq_ticket:
- seq = lockseq_nested_ticket;
- break;
- case lockseq_queuing:
- seq = lockseq_nested_queuing;
- break;
- case lockseq_drdpa:
- seq = lockseq_nested_drdpa;
- break;
- default:
- seq = lockseq_nested_queuing;
- }
- KMP_INIT_I_LOCK(lock, seq);
- #if USE_ITT_BUILD
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
- __kmp_itt_lock_creating(ilk->lock, loc);
- #endif
- }
- /* initialize the lock with a hint */
- void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
- uintptr_t hint) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check && user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
- }
- __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_lock, (omp_lock_hint_t)hint,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- }
- /* initialize the lock with a hint */
- void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
- void **user_lock, uintptr_t hint) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check && user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
- }
- __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- }
- #endif // KMP_USE_DYNAMIC_LOCK
- /* initialize the lock */
- void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check && user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, "omp_init_lock");
- }
- __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- static char const *const func = "omp_init_lock";
- kmp_user_lock_p lck;
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check) {
- if (user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, func);
- }
- }
- KMP_CHECK_USER_LOCK_INIT();
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
- }
- INIT_LOCK(lck);
- __kmp_set_user_lock_location(lck, loc);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if USE_ITT_BUILD
- __kmp_itt_lock_creating(lck);
- #endif /* USE_ITT_BUILD */
- #endif // KMP_USE_DYNAMIC_LOCK
- } // __kmpc_init_lock
- /* initialize the lock */
- void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check && user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
- }
- __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- static char const *const func = "omp_init_nest_lock";
- kmp_user_lock_p lck;
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (__kmp_env_consistency_check) {
- if (user_lock == NULL) {
- KMP_FATAL(LockIsUninitialized, func);
- }
- }
- KMP_CHECK_USER_LOCK_INIT();
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
- }
- INIT_NESTED_LOCK(lck);
- __kmp_set_user_lock_location(lck, loc);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_init) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
- ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if USE_ITT_BUILD
- __kmp_itt_lock_creating(lck);
- #endif /* USE_ITT_BUILD */
- #endif // KMP_USE_DYNAMIC_LOCK
- } // __kmpc_init_nest_lock
- void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- #if USE_ITT_BUILD
- kmp_user_lock_p lck;
- if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
- lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
- } else {
- lck = (kmp_user_lock_p)user_lock;
- }
- __kmp_itt_lock_destroyed(lck);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_destroy) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
- #else
- kmp_user_lock_p lck;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_destroy) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if USE_ITT_BUILD
- __kmp_itt_lock_destroyed(lck);
- #endif /* USE_ITT_BUILD */
- DESTROY_LOCK(lck);
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- ;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- ;
- }
- #endif
- else {
- __kmp_user_lock_free(user_lock, gtid, lck);
- }
- #endif // KMP_USE_DYNAMIC_LOCK
- } // __kmpc_destroy_lock
- /* destroy the lock */
- void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- #if USE_ITT_BUILD
- kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
- __kmp_itt_lock_destroyed(ilk->lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_destroy) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_lock_destroy) {
- ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if USE_ITT_BUILD
- __kmp_itt_lock_destroyed(lck);
- #endif /* USE_ITT_BUILD */
- DESTROY_NESTED_LOCK(lck);
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- ;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- ;
- }
- #endif
- else {
- __kmp_user_lock_free(user_lock, gtid, lck);
- }
- #endif // KMP_USE_DYNAMIC_LOCK
- } // __kmpc_destroy_nest_lock
- void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- KMP_COUNT_BLOCK(OMP_set_lock);
- #if KMP_USE_DYNAMIC_LOCK
- int tag = KMP_EXTRACT_D_TAG(user_lock);
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring(
- (kmp_user_lock_p)
- user_lock); // itt function will get to the right lock object.
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if KMP_USE_INLINED_TAS
- if (tag == locktag_tas && !__kmp_env_consistency_check) {
- KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
- } else
- #elif KMP_USE_INLINED_FUTEX
- if (tag == locktag_futex && !__kmp_env_consistency_check) {
- KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
- } else
- #endif
- {
- __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- ACQUIRE_LOCK(lck, gtid);
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquired(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) {
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- }
- #endif
- int acquire_status =
- KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
- (void)acquire_status;
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- // lock_first
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
- codeptr);
- }
- } else {
- if (ompt_enabled.ompt_callback_nest_lock) {
- // lock_next
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- }
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- int acquire_status;
- kmp_user_lock_p lck;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) {
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
- codeptr);
- }
- }
- #endif
- ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquired(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- // lock_first
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- } else {
- if (ompt_enabled.ompt_callback_nest_lock) {
- // lock_next
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- }
- }
- #endif
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- int tag = KMP_EXTRACT_D_TAG(user_lock);
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
- #endif
- #if KMP_USE_INLINED_TAS
- if (tag == locktag_tas && !__kmp_env_consistency_check) {
- KMP_RELEASE_TAS_LOCK(user_lock, gtid);
- } else
- #elif KMP_USE_INLINED_FUTEX
- if (tag == locktag_futex && !__kmp_env_consistency_check) {
- KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
- } else
- #endif
- {
- __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_released) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- /* Can't use serial interval since not block structured */
- /* release the lock */
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- #if KMP_OS_LINUX && \
- (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
- // "fast" path implemented to fix customer performance issue
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
- #endif /* USE_ITT_BUILD */
- TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
- KMP_MB();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_released) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- return;
- #else
- lck = (kmp_user_lock_p)user_lock;
- #endif
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing(lck);
- #endif /* USE_ITT_BUILD */
- RELEASE_LOCK(lck, gtid);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_released) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- /* release the lock */
- void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
- #endif
- int release_status =
- KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
- (void)release_status;
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) {
- if (release_status == KMP_LOCK_RELEASED) {
- if (ompt_enabled.ompt_callback_mutex_released) {
- // release_lock_last
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
- codeptr);
- }
- } else if (ompt_enabled.ompt_callback_nest_lock) {
- // release_lock_prev
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- }
- #endif
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- /* Can't use serial interval since not block structured */
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- #if KMP_OS_LINUX && \
- (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
- // "fast" path implemented to fix customer performance issue
- kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- int release_status = KMP_LOCK_STILL_HELD;
- #endif
- if (--(tl->lk.depth_locked) == 0) {
- TCW_4(tl->lk.poll, 0);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- release_status = KMP_LOCK_RELEASED;
- #endif
- }
- KMP_MB();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) {
- if (release_status == KMP_LOCK_RELEASED) {
- if (ompt_enabled.ompt_callback_mutex_released) {
- // release_lock_last
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- } else if (ompt_enabled.ompt_callback_nest_lock) {
- // release_lock_previous
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- }
- #endif
- return;
- #else
- lck = (kmp_user_lock_p)user_lock;
- #endif
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_releasing(lck);
- #endif /* USE_ITT_BUILD */
- int release_status;
- release_status = RELEASE_NESTED_LOCK(lck, gtid);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) {
- if (release_status == KMP_LOCK_RELEASED) {
- if (ompt_enabled.ompt_callback_mutex_released) {
- // release_lock_last
- ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- } else if (ompt_enabled.ompt_callback_nest_lock) {
- // release_lock_previous
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- }
- #endif
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- /* try to acquire the lock */
- int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- KMP_COUNT_BLOCK(OMP_test_lock);
- #if KMP_USE_DYNAMIC_LOCK
- int rc;
- int tag = KMP_EXTRACT_D_TAG(user_lock);
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- #if KMP_USE_INLINED_TAS
- if (tag == locktag_tas && !__kmp_env_consistency_check) {
- KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
- } else
- #elif KMP_USE_INLINED_FUTEX
- if (tag == locktag_futex && !__kmp_env_consistency_check) {
- KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
- } else
- #endif
- {
- rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
- }
- if (rc) {
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- return FTN_TRUE;
- } else {
- #if USE_ITT_BUILD
- __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
- #endif
- return FTN_FALSE;
- }
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- int rc;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- rc = TEST_LOCK(lck, gtid);
- #if USE_ITT_BUILD
- if (rc) {
- __kmp_itt_lock_acquired(lck);
- } else {
- __kmp_itt_lock_cancelled(lck);
- }
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- #endif
- return (rc ? FTN_TRUE : FTN_FALSE);
- /* Can't use serial interval since not block structured */
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- /* try to acquire the lock */
- int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
- #if KMP_USE_DYNAMIC_LOCK
- int rc;
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock),
- (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- #endif
- rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
- #if USE_ITT_BUILD
- if (rc) {
- __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
- } else {
- __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
- }
- #endif
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled && rc) {
- if (rc == 1) {
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- // lock_first
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
- codeptr);
- }
- } else {
- if (ompt_enabled.ompt_callback_nest_lock) {
- // lock_next
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
- }
- }
- }
- #endif
- return rc;
- #else // KMP_USE_DYNAMIC_LOCK
- kmp_user_lock_p lck;
- int rc;
- if ((__kmp_user_lock_kind == lk_tas) &&
- (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #if KMP_USE_FUTEX
- else if ((__kmp_user_lock_kind == lk_futex) &&
- (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
- OMP_NEST_LOCK_T_SIZE)) {
- lck = (kmp_user_lock_p)user_lock;
- }
- #endif
- else {
- lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
- }
- #if USE_ITT_BUILD
- __kmp_itt_lock_acquiring(lck);
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- // This is the case, if called from omp_init_lock_with_hint:
- void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
- if (!codeptr)
- codeptr = OMPT_GET_RETURN_ADDRESS(0);
- if (ompt_enabled.enabled) &&
- ompt_enabled.ompt_callback_mutex_acquire) {
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
- codeptr);
- }
- #endif
- rc = TEST_NESTED_LOCK(lck, gtid);
- #if USE_ITT_BUILD
- if (rc) {
- __kmp_itt_lock_acquired(lck);
- } else {
- __kmp_itt_lock_cancelled(lck);
- }
- #endif /* USE_ITT_BUILD */
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled && rc) {
- if (rc == 1) {
- if (ompt_enabled.ompt_callback_mutex_acquired) {
- // lock_first
- ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- } else {
- if (ompt_enabled.ompt_callback_nest_lock) {
- // lock_next
- ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
- }
- }
- }
- #endif
- return rc;
- /* Can't use serial interval since not block structured */
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- // Interface to fast scalable reduce methods routines
- // keep the selected method in a thread local structure for cross-function
- // usage: will be used in __kmpc_end_reduce* functions;
- // another solution: to re-determine the method one more time in
- // __kmpc_end_reduce* functions (new prototype required then)
- // AT: which solution is better?
- #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
- ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
- #define __KMP_GET_REDUCTION_METHOD(gtid) \
- (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
- // description of the packed_reduction_method variable: look at the macros in
- // kmp.h
- // used in a critical section reduce block
- static __forceinline void
- __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *crit) {
- // this lock was visible to a customer and to the threading profile tool as a
- // serial overhead span (although it's used for an internal purpose only)
- // why was it visible in previous implementation?
- // should we keep it visible in new reduce block?
- kmp_user_lock_p lck;
- #if KMP_USE_DYNAMIC_LOCK
- kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
- // Check if it is initialized.
- if (*lk == 0) {
- if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
- KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
- KMP_GET_D_TAG(__kmp_user_lock_seq));
- } else {
- __kmp_init_indirect_csptr(crit, loc, global_tid,
- KMP_GET_I_TAG(__kmp_user_lock_seq));
- }
- }
- // Branch for accessing the actual lock object and set operation. This
- // branching is inevitable since this lock initialization does not follow the
- // normal dispatch path (lock table is not used).
- if (KMP_EXTRACT_D_TAG(lk) != 0) {
- lck = (kmp_user_lock_p)lk;
- KMP_DEBUG_ASSERT(lck != NULL);
- if (__kmp_env_consistency_check) {
- __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
- }
- KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
- } else {
- kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
- lck = ilk->lock;
- KMP_DEBUG_ASSERT(lck != NULL);
- if (__kmp_env_consistency_check) {
- __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
- }
- KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
- }
- #else // KMP_USE_DYNAMIC_LOCK
- // We know that the fast reduction code is only emitted by Intel compilers
- // with 32 byte critical sections. If there isn't enough space, then we
- // have to use a pointer.
- if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
- lck = (kmp_user_lock_p)crit;
- } else {
- lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
- }
- KMP_DEBUG_ASSERT(lck != NULL);
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_critical, loc, lck);
- __kmp_acquire_user_lock_with_checks(lck, global_tid);
- #endif // KMP_USE_DYNAMIC_LOCK
- }
- // used in a critical section reduce block
- static __forceinline void
- __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *crit) {
- kmp_user_lock_p lck;
- #if KMP_USE_DYNAMIC_LOCK
- if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
- lck = (kmp_user_lock_p)crit;
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_critical, loc);
- KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
- } else {
- kmp_indirect_lock_t *ilk =
- (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_critical, loc);
- KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
- }
- #else // KMP_USE_DYNAMIC_LOCK
- // We know that the fast reduction code is only emitted by Intel compilers
- // with 32 byte critical sections. If there isn't enough space, then we have
- // to use a pointer.
- if (__kmp_base_user_lock_size > 32) {
- lck = *((kmp_user_lock_p *)crit);
- KMP_ASSERT(lck != NULL);
- } else {
- lck = (kmp_user_lock_p)crit;
- }
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_critical, loc);
- __kmp_release_user_lock_with_checks(lck, global_tid);
- #endif // KMP_USE_DYNAMIC_LOCK
- } // __kmp_end_critical_section_reduce_block
- static __forceinline int
- __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
- int *task_state) {
- kmp_team_t *team;
- // Check if we are inside the teams construct?
- if (th->th.th_teams_microtask) {
- *team_p = team = th->th.th_team;
- if (team->t.t_level == th->th.th_teams_level) {
- // This is reduction at teams construct.
- KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
- // Let's swap teams temporarily for the reduction.
- th->th.th_info.ds.ds_tid = team->t.t_master_tid;
- th->th.th_team = team->t.t_parent;
- th->th.th_team_nproc = th->th.th_team->t.t_nproc;
- th->th.th_task_team = th->th.th_team->t.t_task_team[0];
- *task_state = th->th.th_task_state;
- th->th.th_task_state = 0;
- return 1;
- }
- }
- return 0;
- }
- static __forceinline void
- __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
- // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
- th->th.th_info.ds.ds_tid = 0;
- th->th.th_team = team;
- th->th.th_team_nproc = team->t.t_nproc;
- th->th.th_task_team = team->t.t_task_team[task_state];
- __kmp_type_convert(task_state, &(th->th.th_task_state));
- }
- /* 2.a.i. Reduce Block without a terminating barrier */
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid global thread number
- @param num_vars number of items (variables) to be reduced
- @param reduce_size size of data in bytes to be reduced
- @param reduce_data pointer to data to be reduced
- @param reduce_func callback function providing reduction operation on two
- operands and returning result of reduction in lhs_data
- @param lck pointer to the unique lock data structure
- @result 1 for the primary thread, 0 for all other team threads, 2 for all team
- threads if atomic reduction needed
- The nowait version is used for a reduce clause with the nowait argument.
- */
- kmp_int32
- __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
- size_t reduce_size, void *reduce_data,
- void (*reduce_func)(void *lhs_data, void *rhs_data),
- kmp_critical_name *lck) {
- KMP_COUNT_BLOCK(REDUCE_nowait);
- int retval = 0;
- PACKED_REDUCTION_METHOD_T packed_reduction_method;
- kmp_info_t *th;
- kmp_team_t *team;
- int teams_swapped = 0, task_state;
- KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- // why do we need this initialization here at all?
- // Reduction clause can not be used as a stand-alone directive.
- // do not call __kmp_serial_initialize(), it will be called by
- // __kmp_parallel_initialize() if needed
- // possible detection of false-positive race by the threadchecker ???
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- // check correctness of reduce block nesting
- #if KMP_USE_DYNAMIC_LOCK
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
- #else
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
- #endif
- th = __kmp_thread_from_gtid(global_tid);
- teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
- // packed_reduction_method value will be reused by __kmp_end_reduce* function,
- // the value should be kept in a variable
- // the variable should be either a construct-specific or thread-specific
- // property, not a team specific property
- // (a thread can reach the next reduce block on the next construct, reduce
- // method may differ on the next construct)
- // an ident_t "loc" parameter could be used as a construct-specific property
- // (what if loc == 0?)
- // (if both construct-specific and team-specific variables were shared,
- // then unness extra syncs should be needed)
- // a thread-specific variable is better regarding two issues above (next
- // construct and extra syncs)
- // a thread-specific "th_local.reduction_method" variable is used currently
- // each thread executes 'determine' and 'set' lines (no need to execute by one
- // thread, to avoid unness extra syncs)
- packed_reduction_method = __kmp_determine_reduction_method(
- loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
- __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
- OMPT_REDUCTION_DECL(th, global_tid);
- if (packed_reduction_method == critical_reduce_block) {
- OMPT_REDUCTION_BEGIN;
- __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
- retval = 1;
- } else if (packed_reduction_method == empty_reduce_block) {
- OMPT_REDUCTION_BEGIN;
- // usage: if team size == 1, no synchronization is required ( Intel
- // platforms only )
- retval = 1;
- } else if (packed_reduction_method == atomic_reduce_block) {
- retval = 2;
- // all threads should do this pop here (because __kmpc_end_reduce_nowait()
- // won't be called by the code gen)
- // (it's not quite good, because the checking block has been closed by
- // this 'pop',
- // but atomic operation has not been executed yet, will be executed
- // slightly later, literally on next instruction)
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_reduce, loc);
- } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
- tree_reduce_block)) {
- // AT: performance issue: a real barrier here
- // AT: (if primary thread is slow, other threads are blocked here waiting for
- // the primary thread to come and release them)
- // AT: (it's not what a customer might expect specifying NOWAIT clause)
- // AT: (specifying NOWAIT won't result in improvement of performance, it'll
- // be confusing to a customer)
- // AT: another implementation of *barrier_gather*nowait() (or some other design)
- // might go faster and be more in line with sense of NOWAIT
- // AT: TO DO: do epcc test and compare times
- // this barrier should be invisible to a customer and to the threading profile
- // tool (it's neither a terminating barrier nor customer's code, it's
- // used for an internal purpose)
- #if OMPT_SUPPORT
- // JP: can this barrier potentially leed to task scheduling?
- // JP: as long as there is a barrier in the implementation, OMPT should and
- // will provide the barrier events
- // so we set-up the necessary frame/return addresses.
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- retval =
- __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
- global_tid, FALSE, reduce_size, reduce_data, reduce_func);
- retval = (retval != 0) ? (0) : (1);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- // all other workers except primary thread should do this pop here
- // ( none of other workers will get to __kmpc_end_reduce_nowait() )
- if (__kmp_env_consistency_check) {
- if (retval == 0) {
- __kmp_pop_sync(global_tid, ct_reduce, loc);
- }
- }
- } else {
- // should never reach this block
- KMP_ASSERT(0); // "unexpected method"
- }
- if (teams_swapped) {
- __kmp_restore_swapped_teams(th, team, task_state);
- }
- KA_TRACE(
- 10,
- ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
- global_tid, packed_reduction_method, retval));
- return retval;
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid global thread id.
- @param lck pointer to the unique lock data structure
- Finish the execution of a reduce nowait.
- */
- void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *lck) {
- PACKED_REDUCTION_METHOD_T packed_reduction_method;
- KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
- OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
- if (packed_reduction_method == critical_reduce_block) {
- __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
- OMPT_REDUCTION_END;
- } else if (packed_reduction_method == empty_reduce_block) {
- // usage: if team size == 1, no synchronization is required ( on Intel
- // platforms only )
- OMPT_REDUCTION_END;
- } else if (packed_reduction_method == atomic_reduce_block) {
- // neither primary thread nor other workers should get here
- // (code gen does not generate this call in case 2: atomic reduce block)
- // actually it's better to remove this elseif at all;
- // after removal this value will checked by the 'else' and will assert
- } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
- tree_reduce_block)) {
- // only primary thread gets here
- // OMPT: tree reduction is annotated in the barrier code
- } else {
- // should never reach this block
- KMP_ASSERT(0); // "unexpected method"
- }
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_reduce, loc);
- KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
- global_tid, packed_reduction_method));
- return;
- }
- /* 2.a.ii. Reduce Block with a terminating barrier */
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid global thread number
- @param num_vars number of items (variables) to be reduced
- @param reduce_size size of data in bytes to be reduced
- @param reduce_data pointer to data to be reduced
- @param reduce_func callback function providing reduction operation on two
- operands and returning result of reduction in lhs_data
- @param lck pointer to the unique lock data structure
- @result 1 for the primary thread, 0 for all other team threads, 2 for all team
- threads if atomic reduction needed
- A blocking reduce that includes an implicit barrier.
- */
- kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
- size_t reduce_size, void *reduce_data,
- void (*reduce_func)(void *lhs_data, void *rhs_data),
- kmp_critical_name *lck) {
- KMP_COUNT_BLOCK(REDUCE_wait);
- int retval = 0;
- PACKED_REDUCTION_METHOD_T packed_reduction_method;
- kmp_info_t *th;
- kmp_team_t *team;
- int teams_swapped = 0, task_state;
- KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- // why do we need this initialization here at all?
- // Reduction clause can not be a stand-alone directive.
- // do not call __kmp_serial_initialize(), it will be called by
- // __kmp_parallel_initialize() if needed
- // possible detection of false-positive race by the threadchecker ???
- if (!TCR_4(__kmp_init_parallel))
- __kmp_parallel_initialize();
- __kmp_resume_if_soft_paused();
- // check correctness of reduce block nesting
- #if KMP_USE_DYNAMIC_LOCK
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
- #else
- if (__kmp_env_consistency_check)
- __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
- #endif
- th = __kmp_thread_from_gtid(global_tid);
- teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
- packed_reduction_method = __kmp_determine_reduction_method(
- loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
- __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
- OMPT_REDUCTION_DECL(th, global_tid);
- if (packed_reduction_method == critical_reduce_block) {
- OMPT_REDUCTION_BEGIN;
- __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
- retval = 1;
- } else if (packed_reduction_method == empty_reduce_block) {
- OMPT_REDUCTION_BEGIN;
- // usage: if team size == 1, no synchronization is required ( Intel
- // platforms only )
- retval = 1;
- } else if (packed_reduction_method == atomic_reduce_block) {
- retval = 2;
- } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
- tree_reduce_block)) {
- // case tree_reduce_block:
- // this barrier should be visible to a customer and to the threading profile
- // tool (it's a terminating barrier on constructs if NOWAIT not specified)
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident =
- loc; // needed for correct notification of frames
- #endif
- retval =
- __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
- global_tid, TRUE, reduce_size, reduce_data, reduce_func);
- retval = (retval != 0) ? (0) : (1);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- // all other workers except primary thread should do this pop here
- // (none of other workers except primary will enter __kmpc_end_reduce())
- if (__kmp_env_consistency_check) {
- if (retval == 0) { // 0: all other workers; 1: primary thread
- __kmp_pop_sync(global_tid, ct_reduce, loc);
- }
- }
- } else {
- // should never reach this block
- KMP_ASSERT(0); // "unexpected method"
- }
- if (teams_swapped) {
- __kmp_restore_swapped_teams(th, team, task_state);
- }
- KA_TRACE(10,
- ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
- global_tid, packed_reduction_method, retval));
- return retval;
- }
- /*!
- @ingroup SYNCHRONIZATION
- @param loc source location information
- @param global_tid global thread id.
- @param lck pointer to the unique lock data structure
- Finish the execution of a blocking reduce.
- The <tt>lck</tt> pointer must be the same as that used in the corresponding
- start function.
- */
- void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
- kmp_critical_name *lck) {
- PACKED_REDUCTION_METHOD_T packed_reduction_method;
- kmp_info_t *th;
- kmp_team_t *team;
- int teams_swapped = 0, task_state;
- KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
- __kmp_assert_valid_gtid(global_tid);
- th = __kmp_thread_from_gtid(global_tid);
- teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
- packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
- // this barrier should be visible to a customer and to the threading profile
- // tool (it's a terminating barrier on constructs if NOWAIT not specified)
- OMPT_REDUCTION_DECL(th, global_tid);
- if (packed_reduction_method == critical_reduce_block) {
- __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
- OMPT_REDUCTION_END;
- // TODO: implicit barrier: should be exposed
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- } else if (packed_reduction_method == empty_reduce_block) {
- OMPT_REDUCTION_END;
- // usage: if team size==1, no synchronization is required (Intel platforms only)
- // TODO: implicit barrier: should be exposed
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- } else if (packed_reduction_method == atomic_reduce_block) {
- #if OMPT_SUPPORT
- ompt_frame_t *ompt_frame;
- if (ompt_enabled.enabled) {
- __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame.ptr == NULL)
- ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
- }
- OMPT_STORE_RETURN_ADDRESS(global_tid);
- #endif
- // TODO: implicit barrier: should be exposed
- #if USE_ITT_NOTIFY
- __kmp_threads[global_tid]->th.th_ident = loc;
- #endif
- __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = ompt_data_none;
- }
- #endif
- } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
- tree_reduce_block)) {
- // only primary thread executes here (primary releases all other workers)
- __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
- global_tid);
- } else {
- // should never reach this block
- KMP_ASSERT(0); // "unexpected method"
- }
- if (teams_swapped) {
- __kmp_restore_swapped_teams(th, team, task_state);
- }
- if (__kmp_env_consistency_check)
- __kmp_pop_sync(global_tid, ct_reduce, loc);
- KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
- global_tid, packed_reduction_method));
- return;
- }
- #undef __KMP_GET_REDUCTION_METHOD
- #undef __KMP_SET_REDUCTION_METHOD
- /* end of interface to fast scalable reduce routines */
- kmp_uint64 __kmpc_get_taskid() {
- kmp_int32 gtid;
- kmp_info_t *thread;
- gtid = __kmp_get_gtid();
- if (gtid < 0) {
- return 0;
- }
- thread = __kmp_thread_from_gtid(gtid);
- return thread->th.th_current_task->td_task_id;
- } // __kmpc_get_taskid
- kmp_uint64 __kmpc_get_parent_taskid() {
- kmp_int32 gtid;
- kmp_info_t *thread;
- kmp_taskdata_t *parent_task;
- gtid = __kmp_get_gtid();
- if (gtid < 0) {
- return 0;
- }
- thread = __kmp_thread_from_gtid(gtid);
- parent_task = thread->th.th_current_task->td_parent;
- return (parent_task == NULL ? 0 : parent_task->td_task_id);
- } // __kmpc_get_parent_taskid
- /*!
- @ingroup WORK_SHARING
- @param loc source location information.
- @param gtid global thread number.
- @param num_dims number of associated doacross loops.
- @param dims info on loops bounds.
- Initialize doacross loop information.
- Expect compiler send us inclusive bounds,
- e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
- */
- void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
- const struct kmp_dim *dims) {
- __kmp_assert_valid_gtid(gtid);
- int j, idx;
- kmp_int64 last, trace_count;
- kmp_info_t *th = __kmp_threads[gtid];
- kmp_team_t *team = th->th.th_team;
- kmp_uint32 *flags;
- kmp_disp_t *pr_buf = th->th.th_dispatch;
- dispatch_shared_info_t *sh_buf;
- KA_TRACE(
- 20,
- ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
- gtid, num_dims, !team->t.t_serialized));
- KMP_DEBUG_ASSERT(dims != NULL);
- KMP_DEBUG_ASSERT(num_dims > 0);
- if (team->t.t_serialized) {
- KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
- return; // no dependencies if team is serialized
- }
- KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
- idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
- // the next loop
- sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
- // Save bounds info into allocated private buffer
- KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
- pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
- th, sizeof(kmp_int64) * (4 * num_dims + 1));
- KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
- pr_buf->th_doacross_info[0] =
- (kmp_int64)num_dims; // first element is number of dimensions
- // Save also address of num_done in order to access it later without knowing
- // the buffer index
- pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
- pr_buf->th_doacross_info[2] = dims[0].lo;
- pr_buf->th_doacross_info[3] = dims[0].up;
- pr_buf->th_doacross_info[4] = dims[0].st;
- last = 5;
- for (j = 1; j < num_dims; ++j) {
- kmp_int64
- range_length; // To keep ranges of all dimensions but the first dims[0]
- if (dims[j].st == 1) { // most common case
- // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
- range_length = dims[j].up - dims[j].lo + 1;
- } else {
- if (dims[j].st > 0) {
- KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
- range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
- } else { // negative increment
- KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
- range_length =
- (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
- }
- }
- pr_buf->th_doacross_info[last++] = range_length;
- pr_buf->th_doacross_info[last++] = dims[j].lo;
- pr_buf->th_doacross_info[last++] = dims[j].up;
- pr_buf->th_doacross_info[last++] = dims[j].st;
- }
- // Compute total trip count.
- // Start with range of dims[0] which we don't need to keep in the buffer.
- if (dims[0].st == 1) { // most common case
- trace_count = dims[0].up - dims[0].lo + 1;
- } else if (dims[0].st > 0) {
- KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
- trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
- } else { // negative increment
- KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
- trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
- }
- for (j = 1; j < num_dims; ++j) {
- trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
- }
- KMP_DEBUG_ASSERT(trace_count > 0);
- // Check if shared buffer is not occupied by other loop (idx -
- // __kmp_dispatch_num_buffers)
- if (idx != sh_buf->doacross_buf_idx) {
- // Shared buffer is occupied, wait for it to be free
- __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
- __kmp_eq_4, NULL);
- }
- #if KMP_32_BIT_ARCH
- // Check if we are the first thread. After the CAS the first thread gets 0,
- // others get 1 if initialization is in progress, allocated pointer otherwise.
- // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
- flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
- (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
- #else
- flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
- (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
- #endif
- if (flags == NULL) {
- // we are the first thread, allocate the array of flags
- size_t size =
- (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
- flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
- KMP_MB();
- sh_buf->doacross_flags = flags;
- } else if (flags == (kmp_uint32 *)1) {
- #if KMP_32_BIT_ARCH
- // initialization is still in progress, need to wait
- while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
- #else
- while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
- #endif
- KMP_YIELD(TRUE);
- KMP_MB();
- } else {
- KMP_MB();
- }
- KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
- pr_buf->th_doacross_flags =
- sh_buf->doacross_flags; // save private copy in order to not
- // touch shared buffer on each iteration
- KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
- }
- void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
- __kmp_assert_valid_gtid(gtid);
- kmp_int64 shft;
- size_t num_dims, i;
- kmp_uint32 flag;
- kmp_int64 iter_number; // iteration number of "collapsed" loop nest
- kmp_info_t *th = __kmp_threads[gtid];
- kmp_team_t *team = th->th.th_team;
- kmp_disp_t *pr_buf;
- kmp_int64 lo, up, st;
- KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
- if (team->t.t_serialized) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
- return; // no dependencies if team is serialized
- }
- // calculate sequential iteration number and check out-of-bounds condition
- pr_buf = th->th.th_dispatch;
- KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
- num_dims = (size_t)pr_buf->th_doacross_info[0];
- lo = pr_buf->th_doacross_info[2];
- up = pr_buf->th_doacross_info[3];
- st = pr_buf->th_doacross_info[4];
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_dependence_t deps[num_dims];
- #endif
- if (st == 1) { // most common case
- if (vec[0] < lo || vec[0] > up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[0], lo, up));
- return;
- }
- iter_number = vec[0] - lo;
- } else if (st > 0) {
- if (vec[0] < lo || vec[0] > up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[0], lo, up));
- return;
- }
- iter_number = (kmp_uint64)(vec[0] - lo) / st;
- } else { // negative increment
- if (vec[0] > lo || vec[0] < up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[0], lo, up));
- return;
- }
- iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- deps[0].variable.value = iter_number;
- deps[0].dependence_type = ompt_dependence_type_sink;
- #endif
- for (i = 1; i < num_dims; ++i) {
- kmp_int64 iter, ln;
- size_t j = i * 4;
- ln = pr_buf->th_doacross_info[j + 1];
- lo = pr_buf->th_doacross_info[j + 2];
- up = pr_buf->th_doacross_info[j + 3];
- st = pr_buf->th_doacross_info[j + 4];
- if (st == 1) {
- if (vec[i] < lo || vec[i] > up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[i], lo, up));
- return;
- }
- iter = vec[i] - lo;
- } else if (st > 0) {
- if (vec[i] < lo || vec[i] > up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[i], lo, up));
- return;
- }
- iter = (kmp_uint64)(vec[i] - lo) / st;
- } else { // st < 0
- if (vec[i] > lo || vec[i] < up) {
- KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
- "bounds [%lld,%lld]\n",
- gtid, vec[i], lo, up));
- return;
- }
- iter = (kmp_uint64)(lo - vec[i]) / (-st);
- }
- iter_number = iter + ln * iter_number;
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- deps[i].variable.value = iter;
- deps[i].dependence_type = ompt_dependence_type_sink;
- #endif
- }
- shft = iter_number % 32; // use 32-bit granularity
- iter_number >>= 5; // divided by 32
- flag = 1 << shft;
- while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
- KMP_YIELD(TRUE);
- }
- KMP_MB();
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_dependences) {
- ompt_callbacks.ompt_callback(ompt_callback_dependences)(
- &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
- }
- #endif
- KA_TRACE(20,
- ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
- gtid, (iter_number << 5) + shft));
- }
- void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
- __kmp_assert_valid_gtid(gtid);
- kmp_int64 shft;
- size_t num_dims, i;
- kmp_uint32 flag;
- kmp_int64 iter_number; // iteration number of "collapsed" loop nest
- kmp_info_t *th = __kmp_threads[gtid];
- kmp_team_t *team = th->th.th_team;
- kmp_disp_t *pr_buf;
- kmp_int64 lo, st;
- KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
- if (team->t.t_serialized) {
- KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
- return; // no dependencies if team is serialized
- }
- // calculate sequential iteration number (same as in "wait" but no
- // out-of-bounds checks)
- pr_buf = th->th.th_dispatch;
- KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
- num_dims = (size_t)pr_buf->th_doacross_info[0];
- lo = pr_buf->th_doacross_info[2];
- st = pr_buf->th_doacross_info[4];
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_dependence_t deps[num_dims];
- #endif
- if (st == 1) { // most common case
- iter_number = vec[0] - lo;
- } else if (st > 0) {
- iter_number = (kmp_uint64)(vec[0] - lo) / st;
- } else { // negative increment
- iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- deps[0].variable.value = iter_number;
- deps[0].dependence_type = ompt_dependence_type_source;
- #endif
- for (i = 1; i < num_dims; ++i) {
- kmp_int64 iter, ln;
- size_t j = i * 4;
- ln = pr_buf->th_doacross_info[j + 1];
- lo = pr_buf->th_doacross_info[j + 2];
- st = pr_buf->th_doacross_info[j + 4];
- if (st == 1) {
- iter = vec[i] - lo;
- } else if (st > 0) {
- iter = (kmp_uint64)(vec[i] - lo) / st;
- } else { // st < 0
- iter = (kmp_uint64)(lo - vec[i]) / (-st);
- }
- iter_number = iter + ln * iter_number;
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- deps[i].variable.value = iter;
- deps[i].dependence_type = ompt_dependence_type_source;
- #endif
- }
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_dependences) {
- ompt_callbacks.ompt_callback(ompt_callback_dependences)(
- &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
- }
- #endif
- shft = iter_number % 32; // use 32-bit granularity
- iter_number >>= 5; // divided by 32
- flag = 1 << shft;
- KMP_MB();
- if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
- KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
- KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
- (iter_number << 5) + shft));
- }
- void __kmpc_doacross_fini(ident_t *loc, int gtid) {
- __kmp_assert_valid_gtid(gtid);
- kmp_int32 num_done;
- kmp_info_t *th = __kmp_threads[gtid];
- kmp_team_t *team = th->th.th_team;
- kmp_disp_t *pr_buf = th->th.th_dispatch;
- KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
- if (team->t.t_serialized) {
- KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
- return; // nothing to do
- }
- num_done =
- KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
- if (num_done == th->th.th_team_nproc) {
- // we are the last thread, need to free shared resources
- int idx = pr_buf->th_doacross_buf_idx - 1;
- dispatch_shared_info_t *sh_buf =
- &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
- KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
- (kmp_int64)&sh_buf->doacross_num_done);
- KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
- KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
- __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
- sh_buf->doacross_flags = NULL;
- sh_buf->doacross_num_done = 0;
- sh_buf->doacross_buf_idx +=
- __kmp_dispatch_num_buffers; // free buffer for future re-use
- }
- // free private resources (need to keep buffer index forever)
- pr_buf->th_doacross_flags = NULL;
- __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
- pr_buf->th_doacross_info = NULL;
- KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
- }
- /* OpenMP 5.1 Memory Management routines */
- void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
- return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
- }
- void *omp_aligned_alloc(size_t align, size_t size,
- omp_allocator_handle_t allocator) {
- return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
- }
- void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
- return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
- }
- void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
- omp_allocator_handle_t allocator) {
- return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
- }
- void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
- omp_allocator_handle_t free_allocator) {
- return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
- free_allocator);
- }
- void omp_free(void *ptr, omp_allocator_handle_t allocator) {
- ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
- }
- /* end of OpenMP 5.1 Memory Management routines */
- int __kmpc_get_target_offload(void) {
- if (!__kmp_init_serial) {
- __kmp_serial_initialize();
- }
- return __kmp_target_offload;
- }
- int __kmpc_pause_resource(kmp_pause_status_t level) {
- if (!__kmp_init_serial) {
- return 1; // Can't pause if runtime is not initialized
- }
- return __kmp_pause_resource(level);
- }
- void __kmpc_error(ident_t *loc, int severity, const char *message) {
- if (!__kmp_init_serial)
- __kmp_serial_initialize();
- KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
- ompt_callbacks.ompt_callback(ompt_callback_error)(
- (ompt_severity_t)severity, message, KMP_STRLEN(message),
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif // OMPT_SUPPORT
- char *src_loc;
- if (loc && loc->psource) {
- kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
- src_loc =
- __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
- __kmp_str_loc_free(&str_loc);
- } else {
- src_loc = __kmp_str_format("unknown");
- }
- if (severity == severity_warning)
- KMP_WARNING(UserDirectedWarning, src_loc, message);
- else
- KMP_FATAL(UserDirectedError, src_loc, message);
- __kmp_str_free(&src_loc);
- }
- // Mark begin of scope directive.
- void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
- // reserved is for extension of scope directive and not used.
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
- kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
- int tid = __kmp_tid_from_gtid(gtid);
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_scope, ompt_scope_begin,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif // OMPT_SUPPORT && OMPT_OPTIONAL
- }
- // Mark end of scope directive
- void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
- // reserved is for extension of scope directive and not used.
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
- kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
- int tid = __kmp_tid_from_gtid(gtid);
- ompt_callbacks.ompt_callback(ompt_callback_work)(
- ompt_work_scope, ompt_scope_end,
- &(team->t.ompt_team_info.parallel_data),
- &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
- OMPT_GET_RETURN_ADDRESS(0));
- }
- #endif // OMPT_SUPPORT && OMPT_OPTIONAL
- }
- #ifdef KMP_USE_VERSION_SYMBOLS
- // For GOMP compatibility there are two versions of each omp_* API.
- // One is the plain C symbol and one is the Fortran symbol with an appended
- // underscore. When we implement a specific ompc_* version of an omp_*
- // function, we want the plain GOMP versioned symbol to alias the ompc_* version
- // instead of the Fortran versions in kmp_ftn_entry.h
- extern "C" {
- // Have to undef these from omp.h so they aren't translated into
- // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
- #ifdef omp_set_affinity_format
- #undef omp_set_affinity_format
- #endif
- #ifdef omp_get_affinity_format
- #undef omp_get_affinity_format
- #endif
- #ifdef omp_display_affinity
- #undef omp_display_affinity
- #endif
- #ifdef omp_capture_affinity
- #undef omp_capture_affinity
- #endif
- KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
- "OMP_5.0");
- KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
- "OMP_5.0");
- KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
- "OMP_5.0");
- KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
- "OMP_5.0");
- } // extern "C"
- #endif
|