kmp_tasking.cpp 201 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133
  1. /*
  2. * kmp_tasking.cpp -- OpenMP 3.0 tasking support.
  3. */
  4. //===----------------------------------------------------------------------===//
  5. //
  6. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  7. // See https://llvm.org/LICENSE.txt for license information.
  8. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "kmp.h"
  12. #include "kmp_i18n.h"
  13. #include "kmp_itt.h"
  14. #include "kmp_stats.h"
  15. #include "kmp_wait_release.h"
  16. #include "kmp_taskdeps.h"
  17. #if OMPT_SUPPORT
  18. #include "ompt-specific.h"
  19. #endif
  20. /* forward declaration */
  21. static void __kmp_enable_tasking(kmp_task_team_t *task_team,
  22. kmp_info_t *this_thr);
  23. static void __kmp_alloc_task_deque(kmp_info_t *thread,
  24. kmp_thread_data_t *thread_data);
  25. static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
  26. kmp_task_team_t *task_team);
  27. static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
  28. #ifdef BUILD_TIED_TASK_STACK
  29. // __kmp_trace_task_stack: print the tied tasks from the task stack in order
  30. // from top do bottom
  31. //
  32. // gtid: global thread identifier for thread containing stack
  33. // thread_data: thread data for task team thread containing stack
  34. // threshold: value above which the trace statement triggers
  35. // location: string identifying call site of this function (for trace)
  36. static void __kmp_trace_task_stack(kmp_int32 gtid,
  37. kmp_thread_data_t *thread_data,
  38. int threshold, char *location) {
  39. kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
  40. kmp_taskdata_t **stack_top = task_stack->ts_top;
  41. kmp_int32 entries = task_stack->ts_entries;
  42. kmp_taskdata_t *tied_task;
  43. KA_TRACE(
  44. threshold,
  45. ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
  46. "first_block = %p, stack_top = %p \n",
  47. location, gtid, entries, task_stack->ts_first_block, stack_top));
  48. KMP_DEBUG_ASSERT(stack_top != NULL);
  49. KMP_DEBUG_ASSERT(entries > 0);
  50. while (entries != 0) {
  51. KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
  52. // fix up ts_top if we need to pop from previous block
  53. if (entries & TASK_STACK_INDEX_MASK == 0) {
  54. kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
  55. stack_block = stack_block->sb_prev;
  56. stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
  57. }
  58. // finish bookkeeping
  59. stack_top--;
  60. entries--;
  61. tied_task = *stack_top;
  62. KMP_DEBUG_ASSERT(tied_task != NULL);
  63. KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
  64. KA_TRACE(threshold,
  65. ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
  66. "stack_top=%p, tied_task=%p\n",
  67. location, gtid, entries, stack_top, tied_task));
  68. }
  69. KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
  70. KA_TRACE(threshold,
  71. ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
  72. location, gtid));
  73. }
  74. // __kmp_init_task_stack: initialize the task stack for the first time
  75. // after a thread_data structure is created.
  76. // It should not be necessary to do this again (assuming the stack works).
  77. //
  78. // gtid: global thread identifier of calling thread
  79. // thread_data: thread data for task team thread containing stack
  80. static void __kmp_init_task_stack(kmp_int32 gtid,
  81. kmp_thread_data_t *thread_data) {
  82. kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
  83. kmp_stack_block_t *first_block;
  84. // set up the first block of the stack
  85. first_block = &task_stack->ts_first_block;
  86. task_stack->ts_top = (kmp_taskdata_t **)first_block;
  87. memset((void *)first_block, '\0',
  88. TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
  89. // initialize the stack to be empty
  90. task_stack->ts_entries = TASK_STACK_EMPTY;
  91. first_block->sb_next = NULL;
  92. first_block->sb_prev = NULL;
  93. }
  94. // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
  95. //
  96. // gtid: global thread identifier for calling thread
  97. // thread_data: thread info for thread containing stack
  98. static void __kmp_free_task_stack(kmp_int32 gtid,
  99. kmp_thread_data_t *thread_data) {
  100. kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
  101. kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
  102. KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
  103. // free from the second block of the stack
  104. while (stack_block != NULL) {
  105. kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
  106. stack_block->sb_next = NULL;
  107. stack_block->sb_prev = NULL;
  108. if (stack_block != &task_stack->ts_first_block) {
  109. __kmp_thread_free(thread,
  110. stack_block); // free the block, if not the first
  111. }
  112. stack_block = next_block;
  113. }
  114. // initialize the stack to be empty
  115. task_stack->ts_entries = 0;
  116. task_stack->ts_top = NULL;
  117. }
  118. // __kmp_push_task_stack: Push the tied task onto the task stack.
  119. // Grow the stack if necessary by allocating another block.
  120. //
  121. // gtid: global thread identifier for calling thread
  122. // thread: thread info for thread containing stack
  123. // tied_task: the task to push on the stack
  124. static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
  125. kmp_taskdata_t *tied_task) {
  126. // GEH - need to consider what to do if tt_threads_data not allocated yet
  127. kmp_thread_data_t *thread_data =
  128. &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
  129. kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
  130. if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
  131. return; // Don't push anything on stack if team or team tasks are serialized
  132. }
  133. KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
  134. KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
  135. KA_TRACE(20,
  136. ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
  137. gtid, thread, tied_task));
  138. // Store entry
  139. *(task_stack->ts_top) = tied_task;
  140. // Do bookkeeping for next push
  141. task_stack->ts_top++;
  142. task_stack->ts_entries++;
  143. if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
  144. // Find beginning of this task block
  145. kmp_stack_block_t *stack_block =
  146. (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
  147. // Check if we already have a block
  148. if (stack_block->sb_next !=
  149. NULL) { // reset ts_top to beginning of next block
  150. task_stack->ts_top = &stack_block->sb_next->sb_block[0];
  151. } else { // Alloc new block and link it up
  152. kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
  153. thread, sizeof(kmp_stack_block_t));
  154. task_stack->ts_top = &new_block->sb_block[0];
  155. stack_block->sb_next = new_block;
  156. new_block->sb_prev = stack_block;
  157. new_block->sb_next = NULL;
  158. KA_TRACE(
  159. 30,
  160. ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
  161. gtid, tied_task, new_block));
  162. }
  163. }
  164. KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
  165. tied_task));
  166. }
  167. // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
  168. // the task, just check to make sure it matches the ending task passed in.
  169. //
  170. // gtid: global thread identifier for the calling thread
  171. // thread: thread info structure containing stack
  172. // tied_task: the task popped off the stack
  173. // ending_task: the task that is ending (should match popped task)
  174. static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
  175. kmp_taskdata_t *ending_task) {
  176. // GEH - need to consider what to do if tt_threads_data not allocated yet
  177. kmp_thread_data_t *thread_data =
  178. &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
  179. kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
  180. kmp_taskdata_t *tied_task;
  181. if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
  182. // Don't pop anything from stack if team or team tasks are serialized
  183. return;
  184. }
  185. KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
  186. KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
  187. KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
  188. thread));
  189. // fix up ts_top if we need to pop from previous block
  190. if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
  191. kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
  192. stack_block = stack_block->sb_prev;
  193. task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
  194. }
  195. // finish bookkeeping
  196. task_stack->ts_top--;
  197. task_stack->ts_entries--;
  198. tied_task = *(task_stack->ts_top);
  199. KMP_DEBUG_ASSERT(tied_task != NULL);
  200. KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
  201. KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
  202. KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
  203. tied_task));
  204. return;
  205. }
  206. #endif /* BUILD_TIED_TASK_STACK */
  207. // returns 1 if new task is allowed to execute, 0 otherwise
  208. // checks Task Scheduling constraint (if requested) and
  209. // mutexinoutset dependencies if any
  210. static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained,
  211. const kmp_taskdata_t *tasknew,
  212. const kmp_taskdata_t *taskcurr) {
  213. if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
  214. // Check if the candidate obeys the Task Scheduling Constraints (TSC)
  215. // only descendant of all deferred tied tasks can be scheduled, checking
  216. // the last one is enough, as it in turn is the descendant of all others
  217. kmp_taskdata_t *current = taskcurr->td_last_tied;
  218. KMP_DEBUG_ASSERT(current != NULL);
  219. // check if the task is not suspended on barrier
  220. if (current->td_flags.tasktype == TASK_EXPLICIT ||
  221. current->td_taskwait_thread > 0) { // <= 0 on barrier
  222. kmp_int32 level = current->td_level;
  223. kmp_taskdata_t *parent = tasknew->td_parent;
  224. while (parent != current && parent->td_level > level) {
  225. // check generation up to the level of the current task
  226. parent = parent->td_parent;
  227. KMP_DEBUG_ASSERT(parent != NULL);
  228. }
  229. if (parent != current)
  230. return false;
  231. }
  232. }
  233. // Check mutexinoutset dependencies, acquire locks
  234. kmp_depnode_t *node = tasknew->td_depnode;
  235. if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
  236. for (int i = 0; i < node->dn.mtx_num_locks; ++i) {
  237. KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
  238. if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
  239. continue;
  240. // could not get the lock, release previous locks
  241. for (int j = i - 1; j >= 0; --j)
  242. __kmp_release_lock(node->dn.mtx_locks[j], gtid);
  243. return false;
  244. }
  245. // negative num_locks means all locks acquired successfully
  246. node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
  247. }
  248. return true;
  249. }
  250. // __kmp_realloc_task_deque:
  251. // Re-allocates a task deque for a particular thread, copies the content from
  252. // the old deque and adjusts the necessary data structures relating to the
  253. // deque. This operation must be done with the deque_lock being held
  254. static void __kmp_realloc_task_deque(kmp_info_t *thread,
  255. kmp_thread_data_t *thread_data) {
  256. kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
  257. KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
  258. kmp_int32 new_size = 2 * size;
  259. KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
  260. "%d] for thread_data %p\n",
  261. __kmp_gtid_from_thread(thread), size, new_size, thread_data));
  262. kmp_taskdata_t **new_deque =
  263. (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
  264. int i, j;
  265. for (i = thread_data->td.td_deque_head, j = 0; j < size;
  266. i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
  267. new_deque[j] = thread_data->td.td_deque[i];
  268. __kmp_free(thread_data->td.td_deque);
  269. thread_data->td.td_deque_head = 0;
  270. thread_data->td.td_deque_tail = size;
  271. thread_data->td.td_deque = new_deque;
  272. thread_data->td.td_deque_size = new_size;
  273. }
  274. static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
  275. kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(sizeof(kmp_task_pri_t));
  276. kmp_thread_data_t *thread_data = &l->td;
  277. __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
  278. thread_data->td.td_deque_last_stolen = -1;
  279. KE_TRACE(20, ("__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
  280. "for thread_data %p\n",
  281. __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
  282. thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
  283. INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
  284. thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
  285. return l;
  286. }
  287. // The function finds the deque of priority tasks with given priority, or
  288. // allocates a new deque and put it into sorted (high -> low) list of deques.
  289. // Deques of non-default priority tasks are shared between all threads in team,
  290. // as opposed to per-thread deques of tasks with default priority.
  291. // The function is called under the lock task_team->tt.tt_task_pri_lock.
  292. static kmp_thread_data_t *
  293. __kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
  294. kmp_thread_data_t *thread_data;
  295. kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
  296. if (lst->priority == pri) {
  297. // Found queue of tasks with given priority.
  298. thread_data = &lst->td;
  299. } else if (lst->priority < pri) {
  300. // All current priority queues contain tasks with lower priority.
  301. // Allocate new one for given priority tasks.
  302. kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
  303. thread_data = &list->td;
  304. list->priority = pri;
  305. list->next = lst;
  306. task_team->tt.tt_task_pri_list = list;
  307. } else { // task_team->tt.tt_task_pri_list->priority > pri
  308. kmp_task_pri_t *next_queue = lst->next;
  309. while (next_queue && next_queue->priority > pri) {
  310. lst = next_queue;
  311. next_queue = lst->next;
  312. }
  313. // lst->priority > pri && (next == NULL || pri >= next->priority)
  314. if (next_queue == NULL) {
  315. // No queue with pri priority, need to allocate new one.
  316. kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
  317. thread_data = &list->td;
  318. list->priority = pri;
  319. list->next = NULL;
  320. lst->next = list;
  321. } else if (next_queue->priority == pri) {
  322. // Found queue of tasks with given priority.
  323. thread_data = &next_queue->td;
  324. } else { // lst->priority > pri > next->priority
  325. // insert newly allocated between existed queues
  326. kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
  327. thread_data = &list->td;
  328. list->priority = pri;
  329. list->next = next_queue;
  330. lst->next = list;
  331. }
  332. }
  333. return thread_data;
  334. }
  335. // __kmp_push_priority_task: Add a task to the team's priority task deque
  336. static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
  337. kmp_taskdata_t *taskdata,
  338. kmp_task_team_t *task_team,
  339. kmp_int32 pri) {
  340. kmp_thread_data_t *thread_data = NULL;
  341. KA_TRACE(20,
  342. ("__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
  343. gtid, taskdata, pri));
  344. // Find task queue specific to priority value
  345. kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
  346. if (UNLIKELY(lst == NULL)) {
  347. __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  348. if (task_team->tt.tt_task_pri_list == NULL) {
  349. // List of queues is still empty, allocate one.
  350. kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
  351. thread_data = &list->td;
  352. list->priority = pri;
  353. list->next = NULL;
  354. task_team->tt.tt_task_pri_list = list;
  355. } else {
  356. // Other thread initialized a queue. Check if it fits and get thread_data.
  357. thread_data = __kmp_get_priority_deque_data(task_team, pri);
  358. }
  359. __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  360. } else {
  361. if (lst->priority == pri) {
  362. // Found queue of tasks with given priority.
  363. thread_data = &lst->td;
  364. } else {
  365. __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  366. thread_data = __kmp_get_priority_deque_data(task_team, pri);
  367. __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  368. }
  369. }
  370. KMP_DEBUG_ASSERT(thread_data);
  371. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  372. // Check if deque is full
  373. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  374. TASK_DEQUE_SIZE(thread_data->td)) {
  375. if (__kmp_enable_task_throttling &&
  376. __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
  377. thread->th.th_current_task)) {
  378. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  379. KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "
  380. "TASK_NOT_PUSHED for task %p\n",
  381. gtid, taskdata));
  382. return TASK_NOT_PUSHED;
  383. } else {
  384. // expand deque to push the task which is not allowed to execute
  385. __kmp_realloc_task_deque(thread, thread_data);
  386. }
  387. }
  388. KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
  389. TASK_DEQUE_SIZE(thread_data->td));
  390. // Push taskdata.
  391. thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
  392. // Wrap index.
  393. thread_data->td.td_deque_tail =
  394. (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
  395. TCW_4(thread_data->td.td_deque_ntasks,
  396. TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
  397. KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
  398. KMP_FSYNC_RELEASING(taskdata); // releasing child
  399. KA_TRACE(20, ("__kmp_push_priority_task: T#%d returning "
  400. "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
  401. gtid, taskdata, thread_data->td.td_deque_ntasks,
  402. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  403. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  404. task_team->tt.tt_num_task_pri++; // atomic inc
  405. return TASK_SUCCESSFULLY_PUSHED;
  406. }
  407. // __kmp_push_task: Add a task to the thread's deque
  408. static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
  409. kmp_info_t *thread = __kmp_threads[gtid];
  410. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  411. // If we encounter a hidden helper task, and the current thread is not a
  412. // hidden helper thread, we have to give the task to any hidden helper thread
  413. // starting from its shadow one.
  414. if (UNLIKELY(taskdata->td_flags.hidden_helper &&
  415. !KMP_HIDDEN_HELPER_THREAD(gtid))) {
  416. kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
  417. __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
  418. // Signal the hidden helper threads.
  419. __kmp_hidden_helper_worker_thread_signal();
  420. return TASK_SUCCESSFULLY_PUSHED;
  421. }
  422. kmp_task_team_t *task_team = thread->th.th_task_team;
  423. kmp_int32 tid = __kmp_tid_from_gtid(gtid);
  424. kmp_thread_data_t *thread_data;
  425. KA_TRACE(20,
  426. ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
  427. if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
  428. // untied task needs to increment counter so that the task structure is not
  429. // freed prematurely
  430. kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
  431. KMP_DEBUG_USE_VAR(counter);
  432. KA_TRACE(
  433. 20,
  434. ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
  435. gtid, counter, taskdata));
  436. }
  437. // The first check avoids building task_team thread data if serialized
  438. if (UNLIKELY(taskdata->td_flags.task_serial)) {
  439. KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
  440. "TASK_NOT_PUSHED for task %p\n",
  441. gtid, taskdata));
  442. return TASK_NOT_PUSHED;
  443. }
  444. // Now that serialized tasks have returned, we can assume that we are not in
  445. // immediate exec mode
  446. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  447. if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
  448. __kmp_enable_tasking(task_team, thread);
  449. }
  450. KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
  451. KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
  452. if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
  453. __kmp_max_task_priority > 0) {
  454. int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
  455. return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
  456. }
  457. // Find tasking deque specific to encountering thread
  458. thread_data = &task_team->tt.tt_threads_data[tid];
  459. // No lock needed since only owner can allocate. If the task is hidden_helper,
  460. // we don't need it either because we have initialized the dequeue for hidden
  461. // helper thread data.
  462. if (UNLIKELY(thread_data->td.td_deque == NULL)) {
  463. __kmp_alloc_task_deque(thread, thread_data);
  464. }
  465. int locked = 0;
  466. // Check if deque is full
  467. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  468. TASK_DEQUE_SIZE(thread_data->td)) {
  469. if (__kmp_enable_task_throttling &&
  470. __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
  471. thread->th.th_current_task)) {
  472. KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
  473. "TASK_NOT_PUSHED for task %p\n",
  474. gtid, taskdata));
  475. return TASK_NOT_PUSHED;
  476. } else {
  477. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  478. locked = 1;
  479. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  480. TASK_DEQUE_SIZE(thread_data->td)) {
  481. // expand deque to push the task which is not allowed to execute
  482. __kmp_realloc_task_deque(thread, thread_data);
  483. }
  484. }
  485. }
  486. // Lock the deque for the task push operation
  487. if (!locked) {
  488. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  489. // Need to recheck as we can get a proxy task from thread outside of OpenMP
  490. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  491. TASK_DEQUE_SIZE(thread_data->td)) {
  492. if (__kmp_enable_task_throttling &&
  493. __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
  494. thread->th.th_current_task)) {
  495. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  496. KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
  497. "returning TASK_NOT_PUSHED for task %p\n",
  498. gtid, taskdata));
  499. return TASK_NOT_PUSHED;
  500. } else {
  501. // expand deque to push the task which is not allowed to execute
  502. __kmp_realloc_task_deque(thread, thread_data);
  503. }
  504. }
  505. }
  506. // Must have room since no thread can add tasks but calling thread
  507. KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
  508. TASK_DEQUE_SIZE(thread_data->td));
  509. thread_data->td.td_deque[thread_data->td.td_deque_tail] =
  510. taskdata; // Push taskdata
  511. // Wrap index.
  512. thread_data->td.td_deque_tail =
  513. (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
  514. TCW_4(thread_data->td.td_deque_ntasks,
  515. TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
  516. KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
  517. KMP_FSYNC_RELEASING(taskdata); // releasing child
  518. KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
  519. "task=%p ntasks=%d head=%u tail=%u\n",
  520. gtid, taskdata, thread_data->td.td_deque_ntasks,
  521. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  522. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  523. return TASK_SUCCESSFULLY_PUSHED;
  524. }
  525. // __kmp_pop_current_task_from_thread: set up current task from called thread
  526. // when team ends
  527. //
  528. // this_thr: thread structure to set current_task in.
  529. void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
  530. KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
  531. "this_thread=%p, curtask=%p, "
  532. "curtask_parent=%p\n",
  533. 0, this_thr, this_thr->th.th_current_task,
  534. this_thr->th.th_current_task->td_parent));
  535. this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
  536. KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
  537. "this_thread=%p, curtask=%p, "
  538. "curtask_parent=%p\n",
  539. 0, this_thr, this_thr->th.th_current_task,
  540. this_thr->th.th_current_task->td_parent));
  541. }
  542. // __kmp_push_current_task_to_thread: set up current task in called thread for a
  543. // new team
  544. //
  545. // this_thr: thread structure to set up
  546. // team: team for implicit task data
  547. // tid: thread within team to set up
  548. void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
  549. int tid) {
  550. // current task of the thread is a parent of the new just created implicit
  551. // tasks of new team
  552. KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
  553. "curtask=%p "
  554. "parent_task=%p\n",
  555. tid, this_thr, this_thr->th.th_current_task,
  556. team->t.t_implicit_task_taskdata[tid].td_parent));
  557. KMP_DEBUG_ASSERT(this_thr != NULL);
  558. if (tid == 0) {
  559. if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
  560. team->t.t_implicit_task_taskdata[0].td_parent =
  561. this_thr->th.th_current_task;
  562. this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
  563. }
  564. } else {
  565. team->t.t_implicit_task_taskdata[tid].td_parent =
  566. team->t.t_implicit_task_taskdata[0].td_parent;
  567. this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
  568. }
  569. KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
  570. "curtask=%p "
  571. "parent_task=%p\n",
  572. tid, this_thr, this_thr->th.th_current_task,
  573. team->t.t_implicit_task_taskdata[tid].td_parent));
  574. }
  575. // __kmp_task_start: bookkeeping for a task starting execution
  576. //
  577. // GTID: global thread id of calling thread
  578. // task: task starting execution
  579. // current_task: task suspending
  580. static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
  581. kmp_taskdata_t *current_task) {
  582. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  583. kmp_info_t *thread = __kmp_threads[gtid];
  584. KA_TRACE(10,
  585. ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
  586. gtid, taskdata, current_task));
  587. KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
  588. // mark currently executing task as suspended
  589. // TODO: GEH - make sure root team implicit task is initialized properly.
  590. // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
  591. current_task->td_flags.executing = 0;
  592. // Add task to stack if tied
  593. #ifdef BUILD_TIED_TASK_STACK
  594. if (taskdata->td_flags.tiedness == TASK_TIED) {
  595. __kmp_push_task_stack(gtid, thread, taskdata);
  596. }
  597. #endif /* BUILD_TIED_TASK_STACK */
  598. // mark starting task as executing and as current task
  599. thread->th.th_current_task = taskdata;
  600. KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
  601. taskdata->td_flags.tiedness == TASK_UNTIED);
  602. KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
  603. taskdata->td_flags.tiedness == TASK_UNTIED);
  604. taskdata->td_flags.started = 1;
  605. taskdata->td_flags.executing = 1;
  606. KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
  607. KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
  608. // GEH TODO: shouldn't we pass some sort of location identifier here?
  609. // APT: yes, we will pass location here.
  610. // need to store current thread state (in a thread or taskdata structure)
  611. // before setting work_state, otherwise wrong state is set after end of task
  612. KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
  613. return;
  614. }
  615. #if OMPT_SUPPORT
  616. //------------------------------------------------------------------------------
  617. // __ompt_task_init:
  618. // Initialize OMPT fields maintained by a task. This will only be called after
  619. // ompt_start_tool, so we already know whether ompt is enabled or not.
  620. static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
  621. // The calls to __ompt_task_init already have the ompt_enabled condition.
  622. task->ompt_task_info.task_data.value = 0;
  623. task->ompt_task_info.frame.exit_frame = ompt_data_none;
  624. task->ompt_task_info.frame.enter_frame = ompt_data_none;
  625. task->ompt_task_info.frame.exit_frame_flags =
  626. ompt_frame_runtime | ompt_frame_framepointer;
  627. task->ompt_task_info.frame.enter_frame_flags =
  628. ompt_frame_runtime | ompt_frame_framepointer;
  629. task->ompt_task_info.dispatch_chunk.start = 0;
  630. task->ompt_task_info.dispatch_chunk.iterations = 0;
  631. }
  632. // __ompt_task_start:
  633. // Build and trigger task-begin event
  634. static inline void __ompt_task_start(kmp_task_t *task,
  635. kmp_taskdata_t *current_task,
  636. kmp_int32 gtid) {
  637. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  638. ompt_task_status_t status = ompt_task_switch;
  639. if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
  640. status = ompt_task_yield;
  641. __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
  642. }
  643. /* let OMPT know that we're about to run this task */
  644. if (ompt_enabled.ompt_callback_task_schedule) {
  645. ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
  646. &(current_task->ompt_task_info.task_data), status,
  647. &(taskdata->ompt_task_info.task_data));
  648. }
  649. taskdata->ompt_task_info.scheduling_parent = current_task;
  650. }
  651. // __ompt_task_finish:
  652. // Build and trigger final task-schedule event
  653. static inline void __ompt_task_finish(kmp_task_t *task,
  654. kmp_taskdata_t *resumed_task,
  655. ompt_task_status_t status) {
  656. if (ompt_enabled.ompt_callback_task_schedule) {
  657. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  658. if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
  659. taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
  660. status = ompt_task_cancel;
  661. }
  662. /* let OMPT know that we're returning to the callee task */
  663. ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
  664. &(taskdata->ompt_task_info.task_data), status,
  665. (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
  666. }
  667. }
  668. #endif
  669. template <bool ompt>
  670. static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
  671. kmp_task_t *task,
  672. void *frame_address,
  673. void *return_address) {
  674. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  675. kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
  676. KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
  677. "current_task=%p\n",
  678. gtid, loc_ref, taskdata, current_task));
  679. if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
  680. // untied task needs to increment counter so that the task structure is not
  681. // freed prematurely
  682. kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
  683. KMP_DEBUG_USE_VAR(counter);
  684. KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
  685. "incremented for task %p\n",
  686. gtid, counter, taskdata));
  687. }
  688. taskdata->td_flags.task_serial =
  689. 1; // Execute this task immediately, not deferred.
  690. __kmp_task_start(gtid, task, current_task);
  691. #if OMPT_SUPPORT
  692. if (ompt) {
  693. if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
  694. current_task->ompt_task_info.frame.enter_frame.ptr =
  695. taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
  696. current_task->ompt_task_info.frame.enter_frame_flags =
  697. taskdata->ompt_task_info.frame.exit_frame_flags =
  698. ompt_frame_application | ompt_frame_framepointer;
  699. }
  700. if (ompt_enabled.ompt_callback_task_create) {
  701. ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
  702. ompt_callbacks.ompt_callback(ompt_callback_task_create)(
  703. &(parent_info->task_data), &(parent_info->frame),
  704. &(taskdata->ompt_task_info.task_data),
  705. ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
  706. return_address);
  707. }
  708. __ompt_task_start(task, current_task, gtid);
  709. }
  710. #endif // OMPT_SUPPORT
  711. KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
  712. loc_ref, taskdata));
  713. }
  714. #if OMPT_SUPPORT
  715. OMPT_NOINLINE
  716. static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
  717. kmp_task_t *task,
  718. void *frame_address,
  719. void *return_address) {
  720. __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
  721. return_address);
  722. }
  723. #endif // OMPT_SUPPORT
  724. // __kmpc_omp_task_begin_if0: report that a given serialized task has started
  725. // execution
  726. //
  727. // loc_ref: source location information; points to beginning of task block.
  728. // gtid: global thread number.
  729. // task: task thunk for the started task.
  730. void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
  731. kmp_task_t *task) {
  732. #if OMPT_SUPPORT
  733. if (UNLIKELY(ompt_enabled.enabled)) {
  734. OMPT_STORE_RETURN_ADDRESS(gtid);
  735. __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
  736. OMPT_GET_FRAME_ADDRESS(1),
  737. OMPT_LOAD_RETURN_ADDRESS(gtid));
  738. return;
  739. }
  740. #endif
  741. __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
  742. }
  743. #ifdef TASK_UNUSED
  744. // __kmpc_omp_task_begin: report that a given task has started execution
  745. // NEVER GENERATED BY COMPILER, DEPRECATED!!!
  746. void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
  747. kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
  748. KA_TRACE(
  749. 10,
  750. ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
  751. gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
  752. __kmp_task_start(gtid, task, current_task);
  753. KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
  754. loc_ref, KMP_TASK_TO_TASKDATA(task)));
  755. return;
  756. }
  757. #endif // TASK_UNUSED
  758. // __kmp_free_task: free the current task space and the space for shareds
  759. //
  760. // gtid: Global thread ID of calling thread
  761. // taskdata: task to free
  762. // thread: thread data structure of caller
  763. static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
  764. kmp_info_t *thread) {
  765. KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
  766. taskdata));
  767. // Check to make sure all flags and counters have the correct values
  768. KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
  769. KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
  770. KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
  771. KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
  772. KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
  773. taskdata->td_flags.task_serial == 1);
  774. KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
  775. kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
  776. // Clear data to not be re-used later by mistake.
  777. task->data1.destructors = NULL;
  778. task->data2.priority = 0;
  779. taskdata->td_flags.freed = 1;
  780. // deallocate the taskdata and shared variable blocks associated with this task
  781. #if USE_FAST_MEMORY
  782. __kmp_fast_free(thread, taskdata);
  783. #else /* ! USE_FAST_MEMORY */
  784. __kmp_thread_free(thread, taskdata);
  785. #endif
  786. KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
  787. }
  788. // __kmp_free_task_and_ancestors: free the current task and ancestors without
  789. // children
  790. //
  791. // gtid: Global thread ID of calling thread
  792. // taskdata: task to free
  793. // thread: thread data structure of caller
  794. static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
  795. kmp_taskdata_t *taskdata,
  796. kmp_info_t *thread) {
  797. // Proxy tasks must always be allowed to free their parents
  798. // because they can be run in background even in serial mode.
  799. kmp_int32 team_serial =
  800. (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
  801. !taskdata->td_flags.proxy;
  802. KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
  803. kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
  804. KMP_DEBUG_ASSERT(children >= 0);
  805. // Now, go up the ancestor tree to see if any ancestors can now be freed.
  806. while (children == 0) {
  807. kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
  808. KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
  809. "and freeing itself\n",
  810. gtid, taskdata));
  811. // --- Deallocate my ancestor task ---
  812. __kmp_free_task(gtid, taskdata, thread);
  813. taskdata = parent_taskdata;
  814. if (team_serial)
  815. return;
  816. // Stop checking ancestors at implicit task instead of walking up ancestor
  817. // tree to avoid premature deallocation of ancestors.
  818. if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
  819. if (taskdata->td_dephash) { // do we need to cleanup dephash?
  820. int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
  821. kmp_tasking_flags_t flags_old = taskdata->td_flags;
  822. if (children == 0 && flags_old.complete == 1) {
  823. kmp_tasking_flags_t flags_new = flags_old;
  824. flags_new.complete = 0;
  825. if (KMP_COMPARE_AND_STORE_ACQ32(
  826. RCAST(kmp_int32 *, &taskdata->td_flags),
  827. *RCAST(kmp_int32 *, &flags_old),
  828. *RCAST(kmp_int32 *, &flags_new))) {
  829. KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans "
  830. "dephash of implicit task %p\n",
  831. gtid, taskdata));
  832. // cleanup dephash of finished implicit task
  833. __kmp_dephash_free_entries(thread, taskdata->td_dephash);
  834. }
  835. }
  836. }
  837. return;
  838. }
  839. // Predecrement simulated by "- 1" calculation
  840. children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
  841. KMP_DEBUG_ASSERT(children >= 0);
  842. }
  843. KA_TRACE(
  844. 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
  845. "not freeing it yet\n",
  846. gtid, taskdata, children));
  847. }
  848. // Only need to keep track of child task counts if any of the following:
  849. // 1. team parallel and tasking not serialized;
  850. // 2. it is a proxy or detachable or hidden helper task
  851. // 3. the children counter of its parent task is greater than 0.
  852. // The reason for the 3rd one is for serialized team that found detached task,
  853. // hidden helper task, T. In this case, the execution of T is still deferred,
  854. // and it is also possible that a regular task depends on T. In this case, if we
  855. // don't track the children, task synchronization will be broken.
  856. static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
  857. kmp_tasking_flags_t flags = taskdata->td_flags;
  858. bool ret = !(flags.team_serial || flags.tasking_ser);
  859. ret = ret || flags.proxy == TASK_PROXY ||
  860. flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
  861. ret = ret ||
  862. KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
  863. return ret;
  864. }
  865. // __kmp_task_finish: bookkeeping to do when a task finishes execution
  866. //
  867. // gtid: global thread ID for calling thread
  868. // task: task to be finished
  869. // resumed_task: task to be resumed. (may be NULL if task is serialized)
  870. //
  871. // template<ompt>: effectively ompt_enabled.enabled!=0
  872. // the version with ompt=false is inlined, allowing to optimize away all ompt
  873. // code in this case
  874. template <bool ompt>
  875. static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
  876. kmp_taskdata_t *resumed_task) {
  877. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  878. kmp_info_t *thread = __kmp_threads[gtid];
  879. kmp_task_team_t *task_team =
  880. thread->th.th_task_team; // might be NULL for serial teams...
  881. #if KMP_DEBUG
  882. kmp_int32 children = 0;
  883. #endif
  884. KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
  885. "task %p\n",
  886. gtid, taskdata, resumed_task));
  887. KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
  888. // Pop task from stack if tied
  889. #ifdef BUILD_TIED_TASK_STACK
  890. if (taskdata->td_flags.tiedness == TASK_TIED) {
  891. __kmp_pop_task_stack(gtid, thread, taskdata);
  892. }
  893. #endif /* BUILD_TIED_TASK_STACK */
  894. if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
  895. // untied task needs to check the counter so that the task structure is not
  896. // freed prematurely
  897. kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
  898. KA_TRACE(
  899. 20,
  900. ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
  901. gtid, counter, taskdata));
  902. if (counter > 0) {
  903. // untied task is not done, to be continued possibly by other thread, do
  904. // not free it now
  905. if (resumed_task == NULL) {
  906. KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
  907. resumed_task = taskdata->td_parent; // In a serialized task, the resumed
  908. // task is the parent
  909. }
  910. thread->th.th_current_task = resumed_task; // restore current_task
  911. resumed_task->td_flags.executing = 1; // resume previous task
  912. KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
  913. "resuming task %p\n",
  914. gtid, taskdata, resumed_task));
  915. return;
  916. }
  917. }
  918. // bookkeeping for resuming task:
  919. // GEH - note tasking_ser => task_serial
  920. KMP_DEBUG_ASSERT(
  921. (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
  922. taskdata->td_flags.task_serial);
  923. if (taskdata->td_flags.task_serial) {
  924. if (resumed_task == NULL) {
  925. resumed_task = taskdata->td_parent; // In a serialized task, the resumed
  926. // task is the parent
  927. }
  928. } else {
  929. KMP_DEBUG_ASSERT(resumed_task !=
  930. NULL); // verify that resumed task is passed as argument
  931. }
  932. /* If the tasks' destructor thunk flag has been set, we need to invoke the
  933. destructor thunk that has been generated by the compiler. The code is
  934. placed here, since at this point other tasks might have been released
  935. hence overlapping the destructor invocations with some other work in the
  936. released tasks. The OpenMP spec is not specific on when the destructors
  937. are invoked, so we should be free to choose. */
  938. if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
  939. kmp_routine_entry_t destr_thunk = task->data1.destructors;
  940. KMP_ASSERT(destr_thunk);
  941. destr_thunk(gtid, task);
  942. }
  943. KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
  944. KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
  945. KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
  946. bool detach = false;
  947. if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
  948. if (taskdata->td_allow_completion_event.type ==
  949. KMP_EVENT_ALLOW_COMPLETION) {
  950. // event hasn't been fulfilled yet. Try to detach task.
  951. __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
  952. if (taskdata->td_allow_completion_event.type ==
  953. KMP_EVENT_ALLOW_COMPLETION) {
  954. // task finished execution
  955. KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
  956. taskdata->td_flags.executing = 0; // suspend the finishing task
  957. #if OMPT_SUPPORT
  958. // For a detached task, which is not completed, we switch back
  959. // the omp_fulfill_event signals completion
  960. // locking is necessary to avoid a race with ompt_task_late_fulfill
  961. if (ompt)
  962. __ompt_task_finish(task, resumed_task, ompt_task_detach);
  963. #endif
  964. // no access to taskdata after this point!
  965. // __kmp_fulfill_event might free taskdata at any time from now
  966. taskdata->td_flags.proxy = TASK_PROXY; // proxify!
  967. detach = true;
  968. }
  969. __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
  970. }
  971. }
  972. if (!detach) {
  973. taskdata->td_flags.complete = 1; // mark the task as completed
  974. #if OMPT_SUPPORT
  975. // This is not a detached task, we are done here
  976. if (ompt)
  977. __ompt_task_finish(task, resumed_task, ompt_task_complete);
  978. #endif
  979. // TODO: What would be the balance between the conditions in the function
  980. // and an atomic operation?
  981. if (__kmp_track_children_task(taskdata)) {
  982. __kmp_release_deps(gtid, taskdata);
  983. // Predecrement simulated by "- 1" calculation
  984. #if KMP_DEBUG
  985. children = -1 +
  986. #endif
  987. KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
  988. KMP_DEBUG_ASSERT(children >= 0);
  989. if (taskdata->td_taskgroup)
  990. KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
  991. } else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
  992. task_team->tt.tt_hidden_helper_task_encountered)) {
  993. // if we found proxy or hidden helper tasks there could exist a dependency
  994. // chain with the proxy task as origin
  995. __kmp_release_deps(gtid, taskdata);
  996. }
  997. // td_flags.executing must be marked as 0 after __kmp_release_deps has been
  998. // called. Othertwise, if a task is executed immediately from the
  999. // release_deps code, the flag will be reset to 1 again by this same
  1000. // function
  1001. KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
  1002. taskdata->td_flags.executing = 0; // suspend the finishing task
  1003. }
  1004. KA_TRACE(
  1005. 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
  1006. gtid, taskdata, children));
  1007. // Free this task and then ancestor tasks if they have no children.
  1008. // Restore th_current_task first as suggested by John:
  1009. // johnmc: if an asynchronous inquiry peers into the runtime system
  1010. // it doesn't see the freed task as the current task.
  1011. thread->th.th_current_task = resumed_task;
  1012. if (!detach)
  1013. __kmp_free_task_and_ancestors(gtid, taskdata, thread);
  1014. // TODO: GEH - make sure root team implicit task is initialized properly.
  1015. // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
  1016. resumed_task->td_flags.executing = 1; // resume previous task
  1017. KA_TRACE(
  1018. 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
  1019. gtid, taskdata, resumed_task));
  1020. return;
  1021. }
  1022. template <bool ompt>
  1023. static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
  1024. kmp_int32 gtid,
  1025. kmp_task_t *task) {
  1026. KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
  1027. gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
  1028. KMP_DEBUG_ASSERT(gtid >= 0);
  1029. // this routine will provide task to resume
  1030. __kmp_task_finish<ompt>(gtid, task, NULL);
  1031. KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
  1032. gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
  1033. #if OMPT_SUPPORT
  1034. if (ompt) {
  1035. ompt_frame_t *ompt_frame;
  1036. __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
  1037. ompt_frame->enter_frame = ompt_data_none;
  1038. ompt_frame->enter_frame_flags =
  1039. ompt_frame_runtime | ompt_frame_framepointer;
  1040. }
  1041. #endif
  1042. return;
  1043. }
  1044. #if OMPT_SUPPORT
  1045. OMPT_NOINLINE
  1046. void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid,
  1047. kmp_task_t *task) {
  1048. __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
  1049. }
  1050. #endif // OMPT_SUPPORT
  1051. // __kmpc_omp_task_complete_if0: report that a task has completed execution
  1052. //
  1053. // loc_ref: source location information; points to end of task block.
  1054. // gtid: global thread number.
  1055. // task: task thunk for the completed task.
  1056. void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
  1057. kmp_task_t *task) {
  1058. #if OMPT_SUPPORT
  1059. if (UNLIKELY(ompt_enabled.enabled)) {
  1060. __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
  1061. return;
  1062. }
  1063. #endif
  1064. __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
  1065. }
  1066. #ifdef TASK_UNUSED
  1067. // __kmpc_omp_task_complete: report that a task has completed execution
  1068. // NEVER GENERATED BY COMPILER, DEPRECATED!!!
  1069. void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
  1070. kmp_task_t *task) {
  1071. KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
  1072. loc_ref, KMP_TASK_TO_TASKDATA(task)));
  1073. __kmp_task_finish<false>(gtid, task,
  1074. NULL); // Not sure how to find task to resume
  1075. KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
  1076. loc_ref, KMP_TASK_TO_TASKDATA(task)));
  1077. return;
  1078. }
  1079. #endif // TASK_UNUSED
  1080. // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
  1081. // task for a given thread
  1082. //
  1083. // loc_ref: reference to source location of parallel region
  1084. // this_thr: thread data structure corresponding to implicit task
  1085. // team: team for this_thr
  1086. // tid: thread id of given thread within team
  1087. // set_curr_task: TRUE if need to push current task to thread
  1088. // NOTE: Routine does not set up the implicit task ICVS. This is assumed to
  1089. // have already been done elsewhere.
  1090. // TODO: Get better loc_ref. Value passed in may be NULL
  1091. void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
  1092. kmp_team_t *team, int tid, int set_curr_task) {
  1093. kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
  1094. KF_TRACE(
  1095. 10,
  1096. ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
  1097. tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
  1098. task->td_task_id = KMP_GEN_TASK_ID();
  1099. task->td_team = team;
  1100. // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
  1101. // in debugger)
  1102. task->td_ident = loc_ref;
  1103. task->td_taskwait_ident = NULL;
  1104. task->td_taskwait_counter = 0;
  1105. task->td_taskwait_thread = 0;
  1106. task->td_flags.tiedness = TASK_TIED;
  1107. task->td_flags.tasktype = TASK_IMPLICIT;
  1108. task->td_flags.proxy = TASK_FULL;
  1109. // All implicit tasks are executed immediately, not deferred
  1110. task->td_flags.task_serial = 1;
  1111. task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
  1112. task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
  1113. task->td_flags.started = 1;
  1114. task->td_flags.executing = 1;
  1115. task->td_flags.complete = 0;
  1116. task->td_flags.freed = 0;
  1117. task->td_depnode = NULL;
  1118. task->td_last_tied = task;
  1119. task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
  1120. if (set_curr_task) { // only do this init first time thread is created
  1121. KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
  1122. // Not used: don't need to deallocate implicit task
  1123. KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
  1124. task->td_taskgroup = NULL; // An implicit task does not have taskgroup
  1125. task->td_dephash = NULL;
  1126. __kmp_push_current_task_to_thread(this_thr, team, tid);
  1127. } else {
  1128. KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
  1129. KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
  1130. }
  1131. #if OMPT_SUPPORT
  1132. if (UNLIKELY(ompt_enabled.enabled))
  1133. __ompt_task_init(task, tid);
  1134. #endif
  1135. KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
  1136. team, task));
  1137. }
  1138. // __kmp_finish_implicit_task: Release resources associated to implicit tasks
  1139. // at the end of parallel regions. Some resources are kept for reuse in the next
  1140. // parallel region.
  1141. //
  1142. // thread: thread data structure corresponding to implicit task
  1143. void __kmp_finish_implicit_task(kmp_info_t *thread) {
  1144. kmp_taskdata_t *task = thread->th.th_current_task;
  1145. if (task->td_dephash) {
  1146. int children;
  1147. task->td_flags.complete = 1;
  1148. children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
  1149. kmp_tasking_flags_t flags_old = task->td_flags;
  1150. if (children == 0 && flags_old.complete == 1) {
  1151. kmp_tasking_flags_t flags_new = flags_old;
  1152. flags_new.complete = 0;
  1153. if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
  1154. *RCAST(kmp_int32 *, &flags_old),
  1155. *RCAST(kmp_int32 *, &flags_new))) {
  1156. KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans "
  1157. "dephash of implicit task %p\n",
  1158. thread->th.th_info.ds.ds_gtid, task));
  1159. __kmp_dephash_free_entries(thread, task->td_dephash);
  1160. }
  1161. }
  1162. }
  1163. }
  1164. // __kmp_free_implicit_task: Release resources associated to implicit tasks
  1165. // when these are destroyed regions
  1166. //
  1167. // thread: thread data structure corresponding to implicit task
  1168. void __kmp_free_implicit_task(kmp_info_t *thread) {
  1169. kmp_taskdata_t *task = thread->th.th_current_task;
  1170. if (task && task->td_dephash) {
  1171. __kmp_dephash_free(thread, task->td_dephash);
  1172. task->td_dephash = NULL;
  1173. }
  1174. }
  1175. // Round up a size to a power of two specified by val: Used to insert padding
  1176. // between structures co-allocated using a single malloc() call
  1177. static size_t __kmp_round_up_to_val(size_t size, size_t val) {
  1178. if (size & (val - 1)) {
  1179. size &= ~(val - 1);
  1180. if (size <= KMP_SIZE_T_MAX - val) {
  1181. size += val; // Round up if there is no overflow.
  1182. }
  1183. }
  1184. return size;
  1185. } // __kmp_round_up_to_va
  1186. // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
  1187. //
  1188. // loc_ref: source location information
  1189. // gtid: global thread number.
  1190. // flags: include tiedness & task type (explicit vs. implicit) of the ''new''
  1191. // task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
  1192. // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including
  1193. // private vars accessed in task.
  1194. // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed
  1195. // in task.
  1196. // task_entry: Pointer to task code entry point generated by compiler.
  1197. // returns: a pointer to the allocated kmp_task_t structure (task).
  1198. kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
  1199. kmp_tasking_flags_t *flags,
  1200. size_t sizeof_kmp_task_t, size_t sizeof_shareds,
  1201. kmp_routine_entry_t task_entry) {
  1202. kmp_task_t *task;
  1203. kmp_taskdata_t *taskdata;
  1204. kmp_info_t *thread = __kmp_threads[gtid];
  1205. kmp_team_t *team = thread->th.th_team;
  1206. kmp_taskdata_t *parent_task = thread->th.th_current_task;
  1207. size_t shareds_offset;
  1208. if (UNLIKELY(!TCR_4(__kmp_init_middle)))
  1209. __kmp_middle_initialize();
  1210. if (flags->hidden_helper) {
  1211. if (__kmp_enable_hidden_helper) {
  1212. if (!TCR_4(__kmp_init_hidden_helper))
  1213. __kmp_hidden_helper_initialize();
  1214. } else {
  1215. // If the hidden helper task is not enabled, reset the flag to FALSE.
  1216. flags->hidden_helper = FALSE;
  1217. }
  1218. }
  1219. KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
  1220. "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
  1221. gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
  1222. sizeof_shareds, task_entry));
  1223. KMP_DEBUG_ASSERT(parent_task);
  1224. if (parent_task->td_flags.final) {
  1225. if (flags->merged_if0) {
  1226. }
  1227. flags->final = 1;
  1228. }
  1229. if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
  1230. // Untied task encountered causes the TSC algorithm to check entire deque of
  1231. // the victim thread. If no untied task encountered, then checking the head
  1232. // of the deque should be enough.
  1233. KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
  1234. }
  1235. // Detachable tasks are not proxy tasks yet but could be in the future. Doing
  1236. // the tasking setup
  1237. // when that happens is too late.
  1238. if (UNLIKELY(flags->proxy == TASK_PROXY ||
  1239. flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
  1240. if (flags->proxy == TASK_PROXY) {
  1241. flags->tiedness = TASK_UNTIED;
  1242. flags->merged_if0 = 1;
  1243. }
  1244. /* are we running in a sequential parallel or tskm_immediate_exec... we need
  1245. tasking support enabled */
  1246. if ((thread->th.th_task_team) == NULL) {
  1247. /* This should only happen if the team is serialized
  1248. setup a task team and propagate it to the thread */
  1249. KMP_DEBUG_ASSERT(team->t.t_serialized);
  1250. KA_TRACE(30,
  1251. ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
  1252. gtid));
  1253. // 1 indicates setup the current team regardless of nthreads
  1254. __kmp_task_team_setup(thread, team, 1);
  1255. thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
  1256. }
  1257. kmp_task_team_t *task_team = thread->th.th_task_team;
  1258. /* tasking must be enabled now as the task might not be pushed */
  1259. if (!KMP_TASKING_ENABLED(task_team)) {
  1260. KA_TRACE(
  1261. 30,
  1262. ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
  1263. __kmp_enable_tasking(task_team, thread);
  1264. kmp_int32 tid = thread->th.th_info.ds.ds_tid;
  1265. kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
  1266. // No lock needed since only owner can allocate
  1267. if (thread_data->td.td_deque == NULL) {
  1268. __kmp_alloc_task_deque(thread, thread_data);
  1269. }
  1270. }
  1271. if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
  1272. task_team->tt.tt_found_proxy_tasks == FALSE)
  1273. TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
  1274. if (flags->hidden_helper &&
  1275. task_team->tt.tt_hidden_helper_task_encountered == FALSE)
  1276. TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
  1277. }
  1278. // Calculate shared structure offset including padding after kmp_task_t struct
  1279. // to align pointers in shared struct
  1280. shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
  1281. shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
  1282. // Allocate a kmp_taskdata_t block and a kmp_task_t block.
  1283. KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
  1284. shareds_offset));
  1285. KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
  1286. sizeof_shareds));
  1287. // Avoid double allocation here by combining shareds with taskdata
  1288. #if USE_FAST_MEMORY
  1289. taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
  1290. sizeof_shareds);
  1291. #else /* ! USE_FAST_MEMORY */
  1292. taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
  1293. sizeof_shareds);
  1294. #endif /* USE_FAST_MEMORY */
  1295. task = KMP_TASKDATA_TO_TASK(taskdata);
  1296. // Make sure task & taskdata are aligned appropriately
  1297. #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
  1298. KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
  1299. KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
  1300. #else
  1301. KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
  1302. KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
  1303. #endif
  1304. if (sizeof_shareds > 0) {
  1305. // Avoid double allocation here by combining shareds with taskdata
  1306. task->shareds = &((char *)taskdata)[shareds_offset];
  1307. // Make sure shareds struct is aligned to pointer size
  1308. KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
  1309. 0);
  1310. } else {
  1311. task->shareds = NULL;
  1312. }
  1313. task->routine = task_entry;
  1314. task->part_id = 0; // AC: Always start with 0 part id
  1315. taskdata->td_task_id = KMP_GEN_TASK_ID();
  1316. taskdata->td_team = thread->th.th_team;
  1317. taskdata->td_alloc_thread = thread;
  1318. taskdata->td_parent = parent_task;
  1319. taskdata->td_level = parent_task->td_level + 1; // increment nesting level
  1320. KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
  1321. taskdata->td_ident = loc_ref;
  1322. taskdata->td_taskwait_ident = NULL;
  1323. taskdata->td_taskwait_counter = 0;
  1324. taskdata->td_taskwait_thread = 0;
  1325. KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
  1326. // avoid copying icvs for proxy tasks
  1327. if (flags->proxy == TASK_FULL)
  1328. copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
  1329. taskdata->td_flags = *flags;
  1330. taskdata->td_task_team = thread->th.th_task_team;
  1331. taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
  1332. taskdata->td_flags.tasktype = TASK_EXPLICIT;
  1333. // If it is hidden helper task, we need to set the team and task team
  1334. // correspondingly.
  1335. if (flags->hidden_helper) {
  1336. kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
  1337. taskdata->td_team = shadow_thread->th.th_team;
  1338. taskdata->td_task_team = shadow_thread->th.th_task_team;
  1339. }
  1340. // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
  1341. taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
  1342. // GEH - TODO: fix this to copy parent task's value of team_serial flag
  1343. taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
  1344. // GEH - Note we serialize the task if the team is serialized to make sure
  1345. // implicit parallel region tasks are not left until program termination to
  1346. // execute. Also, it helps locality to execute immediately.
  1347. taskdata->td_flags.task_serial =
  1348. (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
  1349. taskdata->td_flags.tasking_ser || flags->merged_if0);
  1350. taskdata->td_flags.started = 0;
  1351. taskdata->td_flags.executing = 0;
  1352. taskdata->td_flags.complete = 0;
  1353. taskdata->td_flags.freed = 0;
  1354. KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
  1355. // start at one because counts current task and children
  1356. KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
  1357. taskdata->td_taskgroup =
  1358. parent_task->td_taskgroup; // task inherits taskgroup from the parent task
  1359. taskdata->td_dephash = NULL;
  1360. taskdata->td_depnode = NULL;
  1361. if (flags->tiedness == TASK_UNTIED)
  1362. taskdata->td_last_tied = NULL; // will be set when the task is scheduled
  1363. else
  1364. taskdata->td_last_tied = taskdata;
  1365. taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
  1366. #if OMPT_SUPPORT
  1367. if (UNLIKELY(ompt_enabled.enabled))
  1368. __ompt_task_init(taskdata, gtid);
  1369. #endif
  1370. // TODO: What would be the balance between the conditions in the function and
  1371. // an atomic operation?
  1372. if (__kmp_track_children_task(taskdata)) {
  1373. KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
  1374. if (parent_task->td_taskgroup)
  1375. KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
  1376. // Only need to keep track of allocated child tasks for explicit tasks since
  1377. // implicit not deallocated
  1378. if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
  1379. KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
  1380. }
  1381. if (flags->hidden_helper) {
  1382. taskdata->td_flags.task_serial = FALSE;
  1383. // Increment the number of hidden helper tasks to be executed
  1384. KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
  1385. }
  1386. }
  1387. KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
  1388. gtid, taskdata, taskdata->td_parent));
  1389. return task;
  1390. }
  1391. kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
  1392. kmp_int32 flags, size_t sizeof_kmp_task_t,
  1393. size_t sizeof_shareds,
  1394. kmp_routine_entry_t task_entry) {
  1395. kmp_task_t *retval;
  1396. kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
  1397. __kmp_assert_valid_gtid(gtid);
  1398. input_flags->native = FALSE;
  1399. // __kmp_task_alloc() sets up all other runtime flags
  1400. KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
  1401. "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
  1402. gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
  1403. input_flags->proxy ? "proxy" : "",
  1404. input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t,
  1405. sizeof_shareds, task_entry));
  1406. retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
  1407. sizeof_shareds, task_entry);
  1408. KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
  1409. return retval;
  1410. }
  1411. kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
  1412. kmp_int32 flags,
  1413. size_t sizeof_kmp_task_t,
  1414. size_t sizeof_shareds,
  1415. kmp_routine_entry_t task_entry,
  1416. kmp_int64 device_id) {
  1417. auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
  1418. // target task is untied defined in the specification
  1419. input_flags.tiedness = TASK_UNTIED;
  1420. if (__kmp_enable_hidden_helper)
  1421. input_flags.hidden_helper = TRUE;
  1422. return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
  1423. sizeof_shareds, task_entry);
  1424. }
  1425. /*!
  1426. @ingroup TASKING
  1427. @param loc_ref location of the original task directive
  1428. @param gtid Global Thread ID of encountering thread
  1429. @param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
  1430. task''
  1431. @param naffins Number of affinity items
  1432. @param affin_list List of affinity items
  1433. @return Returns non-zero if registering affinity information was not successful.
  1434. Returns 0 if registration was successful
  1435. This entry registers the affinity information attached to a task with the task
  1436. thunk structure kmp_taskdata_t.
  1437. */
  1438. kmp_int32
  1439. __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
  1440. kmp_task_t *new_task, kmp_int32 naffins,
  1441. kmp_task_affinity_info_t *affin_list) {
  1442. return 0;
  1443. }
  1444. // __kmp_invoke_task: invoke the specified task
  1445. //
  1446. // gtid: global thread ID of caller
  1447. // task: the task to invoke
  1448. // current_task: the task to resume after task invocation
  1449. static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
  1450. kmp_taskdata_t *current_task) {
  1451. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  1452. kmp_info_t *thread;
  1453. int discard = 0 /* false */;
  1454. KA_TRACE(
  1455. 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
  1456. gtid, taskdata, current_task));
  1457. KMP_DEBUG_ASSERT(task);
  1458. if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
  1459. taskdata->td_flags.complete == 1)) {
  1460. // This is a proxy task that was already completed but it needs to run
  1461. // its bottom-half finish
  1462. KA_TRACE(
  1463. 30,
  1464. ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
  1465. gtid, taskdata));
  1466. __kmp_bottom_half_finish_proxy(gtid, task);
  1467. KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
  1468. "proxy task %p, resuming task %p\n",
  1469. gtid, taskdata, current_task));
  1470. return;
  1471. }
  1472. #if OMPT_SUPPORT
  1473. // For untied tasks, the first task executed only calls __kmpc_omp_task and
  1474. // does not execute code.
  1475. ompt_thread_info_t oldInfo;
  1476. if (UNLIKELY(ompt_enabled.enabled)) {
  1477. // Store the threads states and restore them after the task
  1478. thread = __kmp_threads[gtid];
  1479. oldInfo = thread->th.ompt_thread_info;
  1480. thread->th.ompt_thread_info.wait_id = 0;
  1481. thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
  1482. ? ompt_state_work_serial
  1483. : ompt_state_work_parallel;
  1484. taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  1485. }
  1486. #endif
  1487. // Decreament the counter of hidden helper tasks to be executed
  1488. if (taskdata->td_flags.hidden_helper) {
  1489. // Hidden helper tasks can only be executed by hidden helper threads
  1490. KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
  1491. KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
  1492. }
  1493. // Proxy tasks are not handled by the runtime
  1494. if (taskdata->td_flags.proxy != TASK_PROXY) {
  1495. __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded
  1496. }
  1497. // TODO: cancel tasks if the parallel region has also been cancelled
  1498. // TODO: check if this sequence can be hoisted above __kmp_task_start
  1499. // if cancellation has been enabled for this run ...
  1500. if (UNLIKELY(__kmp_omp_cancellation)) {
  1501. thread = __kmp_threads[gtid];
  1502. kmp_team_t *this_team = thread->th.th_team;
  1503. kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
  1504. if ((taskgroup && taskgroup->cancel_request) ||
  1505. (this_team->t.t_cancel_request == cancel_parallel)) {
  1506. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1507. ompt_data_t *task_data;
  1508. if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
  1509. __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
  1510. ompt_callbacks.ompt_callback(ompt_callback_cancel)(
  1511. task_data,
  1512. ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
  1513. : ompt_cancel_parallel) |
  1514. ompt_cancel_discarded_task,
  1515. NULL);
  1516. }
  1517. #endif
  1518. KMP_COUNT_BLOCK(TASK_cancelled);
  1519. // this task belongs to a task group and we need to cancel it
  1520. discard = 1 /* true */;
  1521. }
  1522. }
  1523. // Invoke the task routine and pass in relevant data.
  1524. // Thunks generated by gcc take a different argument list.
  1525. if (!discard) {
  1526. if (taskdata->td_flags.tiedness == TASK_UNTIED) {
  1527. taskdata->td_last_tied = current_task->td_last_tied;
  1528. KMP_DEBUG_ASSERT(taskdata->td_last_tied);
  1529. }
  1530. #if KMP_STATS_ENABLED
  1531. KMP_COUNT_BLOCK(TASK_executed);
  1532. switch (KMP_GET_THREAD_STATE()) {
  1533. case FORK_JOIN_BARRIER:
  1534. KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
  1535. break;
  1536. case PLAIN_BARRIER:
  1537. KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
  1538. break;
  1539. case TASKYIELD:
  1540. KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
  1541. break;
  1542. case TASKWAIT:
  1543. KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
  1544. break;
  1545. case TASKGROUP:
  1546. KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
  1547. break;
  1548. default:
  1549. KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
  1550. break;
  1551. }
  1552. #endif // KMP_STATS_ENABLED
  1553. // OMPT task begin
  1554. #if OMPT_SUPPORT
  1555. if (UNLIKELY(ompt_enabled.enabled))
  1556. __ompt_task_start(task, current_task, gtid);
  1557. #endif
  1558. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1559. if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
  1560. taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
  1561. ompt_data_t instance = ompt_data_none;
  1562. instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
  1563. ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
  1564. ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
  1565. &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
  1566. ompt_dispatch_taskloop_chunk, instance);
  1567. taskdata->ompt_task_info.dispatch_chunk = {0, 0};
  1568. }
  1569. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  1570. #if OMPD_SUPPORT
  1571. if (ompd_state & OMPD_ENABLE_BP)
  1572. ompd_bp_task_begin();
  1573. #endif
  1574. #if USE_ITT_BUILD && USE_ITT_NOTIFY
  1575. kmp_uint64 cur_time;
  1576. kmp_int32 kmp_itt_count_task =
  1577. __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
  1578. current_task->td_flags.tasktype == TASK_IMPLICIT;
  1579. if (kmp_itt_count_task) {
  1580. thread = __kmp_threads[gtid];
  1581. // Time outer level explicit task on barrier for adjusting imbalance time
  1582. if (thread->th.th_bar_arrive_time)
  1583. cur_time = __itt_get_timestamp();
  1584. else
  1585. kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
  1586. }
  1587. KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
  1588. #endif
  1589. if (task->routine != NULL) {
  1590. #ifdef KMP_GOMP_COMPAT
  1591. if (taskdata->td_flags.native) {
  1592. ((void (*)(void *))(*(task->routine)))(task->shareds);
  1593. } else
  1594. #endif /* KMP_GOMP_COMPAT */
  1595. {
  1596. (*(task->routine))(gtid, task);
  1597. }
  1598. }
  1599. KMP_POP_PARTITIONED_TIMER();
  1600. #if USE_ITT_BUILD && USE_ITT_NOTIFY
  1601. if (kmp_itt_count_task) {
  1602. // Barrier imbalance - adjust arrive time with the task duration
  1603. thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
  1604. }
  1605. KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed)
  1606. KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
  1607. #endif
  1608. }
  1609. #if OMPD_SUPPORT
  1610. if (ompd_state & OMPD_ENABLE_BP)
  1611. ompd_bp_task_end();
  1612. #endif
  1613. // Proxy tasks are not handled by the runtime
  1614. if (taskdata->td_flags.proxy != TASK_PROXY) {
  1615. #if OMPT_SUPPORT
  1616. if (UNLIKELY(ompt_enabled.enabled)) {
  1617. thread->th.ompt_thread_info = oldInfo;
  1618. if (taskdata->td_flags.tiedness == TASK_TIED) {
  1619. taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
  1620. }
  1621. __kmp_task_finish<true>(gtid, task, current_task);
  1622. } else
  1623. #endif
  1624. __kmp_task_finish<false>(gtid, task, current_task);
  1625. }
  1626. KA_TRACE(
  1627. 30,
  1628. ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
  1629. gtid, taskdata, current_task));
  1630. return;
  1631. }
  1632. // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
  1633. //
  1634. // loc_ref: location of original task pragma (ignored)
  1635. // gtid: Global Thread ID of encountering thread
  1636. // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
  1637. // Returns:
  1638. // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
  1639. // be resumed later.
  1640. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
  1641. // resumed later.
  1642. kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
  1643. kmp_task_t *new_task) {
  1644. kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
  1645. KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
  1646. loc_ref, new_taskdata));
  1647. #if OMPT_SUPPORT
  1648. kmp_taskdata_t *parent;
  1649. if (UNLIKELY(ompt_enabled.enabled)) {
  1650. parent = new_taskdata->td_parent;
  1651. if (ompt_enabled.ompt_callback_task_create) {
  1652. ompt_callbacks.ompt_callback(ompt_callback_task_create)(
  1653. &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
  1654. &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
  1655. OMPT_GET_RETURN_ADDRESS(0));
  1656. }
  1657. }
  1658. #endif
  1659. /* Should we execute the new task or queue it? For now, let's just always try
  1660. to queue it. If the queue fills up, then we'll execute it. */
  1661. if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
  1662. { // Execute this task immediately
  1663. kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
  1664. new_taskdata->td_flags.task_serial = 1;
  1665. __kmp_invoke_task(gtid, new_task, current_task);
  1666. }
  1667. KA_TRACE(
  1668. 10,
  1669. ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
  1670. "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
  1671. gtid, loc_ref, new_taskdata));
  1672. #if OMPT_SUPPORT
  1673. if (UNLIKELY(ompt_enabled.enabled)) {
  1674. parent->ompt_task_info.frame.enter_frame = ompt_data_none;
  1675. }
  1676. #endif
  1677. return TASK_CURRENT_NOT_QUEUED;
  1678. }
  1679. // __kmp_omp_task: Schedule a non-thread-switchable task for execution
  1680. //
  1681. // gtid: Global Thread ID of encountering thread
  1682. // new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
  1683. // serialize_immediate: if TRUE then if the task is executed immediately its
  1684. // execution will be serialized
  1685. // Returns:
  1686. // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
  1687. // be resumed later.
  1688. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
  1689. // resumed later.
  1690. kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
  1691. bool serialize_immediate) {
  1692. kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
  1693. /* Should we execute the new task or queue it? For now, let's just always try
  1694. to queue it. If the queue fills up, then we'll execute it. */
  1695. if (new_taskdata->td_flags.proxy == TASK_PROXY ||
  1696. __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
  1697. { // Execute this task immediately
  1698. kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
  1699. if (serialize_immediate)
  1700. new_taskdata->td_flags.task_serial = 1;
  1701. __kmp_invoke_task(gtid, new_task, current_task);
  1702. } else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
  1703. __kmp_wpolicy_passive) {
  1704. kmp_info_t *this_thr = __kmp_threads[gtid];
  1705. kmp_team_t *team = this_thr->th.th_team;
  1706. kmp_int32 nthreads = this_thr->th.th_team_nproc;
  1707. for (int i = 0; i < nthreads; ++i) {
  1708. kmp_info_t *thread = team->t.t_threads[i];
  1709. if (thread == this_thr)
  1710. continue;
  1711. if (thread->th.th_sleep_loc != NULL) {
  1712. __kmp_null_resume_wrapper(thread);
  1713. break; // awake one thread at a time
  1714. }
  1715. }
  1716. }
  1717. return TASK_CURRENT_NOT_QUEUED;
  1718. }
  1719. // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
  1720. // non-thread-switchable task from the parent thread only!
  1721. //
  1722. // loc_ref: location of original task pragma (ignored)
  1723. // gtid: Global Thread ID of encountering thread
  1724. // new_task: non-thread-switchable task thunk allocated by
  1725. // __kmp_omp_task_alloc()
  1726. // Returns:
  1727. // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
  1728. // be resumed later.
  1729. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
  1730. // resumed later.
  1731. kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
  1732. kmp_task_t *new_task) {
  1733. kmp_int32 res;
  1734. KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
  1735. #if KMP_DEBUG || OMPT_SUPPORT
  1736. kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
  1737. #endif
  1738. KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
  1739. new_taskdata));
  1740. __kmp_assert_valid_gtid(gtid);
  1741. #if OMPT_SUPPORT
  1742. kmp_taskdata_t *parent = NULL;
  1743. if (UNLIKELY(ompt_enabled.enabled)) {
  1744. if (!new_taskdata->td_flags.started) {
  1745. OMPT_STORE_RETURN_ADDRESS(gtid);
  1746. parent = new_taskdata->td_parent;
  1747. if (!parent->ompt_task_info.frame.enter_frame.ptr) {
  1748. parent->ompt_task_info.frame.enter_frame.ptr =
  1749. OMPT_GET_FRAME_ADDRESS(0);
  1750. }
  1751. if (ompt_enabled.ompt_callback_task_create) {
  1752. ompt_callbacks.ompt_callback(ompt_callback_task_create)(
  1753. &(parent->ompt_task_info.task_data),
  1754. &(parent->ompt_task_info.frame),
  1755. &(new_taskdata->ompt_task_info.task_data),
  1756. ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
  1757. OMPT_LOAD_RETURN_ADDRESS(gtid));
  1758. }
  1759. } else {
  1760. // We are scheduling the continuation of an UNTIED task.
  1761. // Scheduling back to the parent task.
  1762. __ompt_task_finish(new_task,
  1763. new_taskdata->ompt_task_info.scheduling_parent,
  1764. ompt_task_switch);
  1765. new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
  1766. }
  1767. }
  1768. #endif
  1769. res = __kmp_omp_task(gtid, new_task, true);
  1770. KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
  1771. "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
  1772. gtid, loc_ref, new_taskdata));
  1773. #if OMPT_SUPPORT
  1774. if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
  1775. parent->ompt_task_info.frame.enter_frame = ompt_data_none;
  1776. }
  1777. #endif
  1778. return res;
  1779. }
  1780. // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule
  1781. // a taskloop task with the correct OMPT return address
  1782. //
  1783. // loc_ref: location of original task pragma (ignored)
  1784. // gtid: Global Thread ID of encountering thread
  1785. // new_task: non-thread-switchable task thunk allocated by
  1786. // __kmp_omp_task_alloc()
  1787. // codeptr_ra: return address for OMPT callback
  1788. // Returns:
  1789. // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
  1790. // be resumed later.
  1791. // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
  1792. // resumed later.
  1793. kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
  1794. kmp_task_t *new_task, void *codeptr_ra) {
  1795. kmp_int32 res;
  1796. KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
  1797. #if KMP_DEBUG || OMPT_SUPPORT
  1798. kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
  1799. #endif
  1800. KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
  1801. new_taskdata));
  1802. #if OMPT_SUPPORT
  1803. kmp_taskdata_t *parent = NULL;
  1804. if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
  1805. parent = new_taskdata->td_parent;
  1806. if (!parent->ompt_task_info.frame.enter_frame.ptr)
  1807. parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
  1808. if (ompt_enabled.ompt_callback_task_create) {
  1809. ompt_callbacks.ompt_callback(ompt_callback_task_create)(
  1810. &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
  1811. &(new_taskdata->ompt_task_info.task_data),
  1812. ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
  1813. codeptr_ra);
  1814. }
  1815. }
  1816. #endif
  1817. res = __kmp_omp_task(gtid, new_task, true);
  1818. KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
  1819. "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
  1820. gtid, loc_ref, new_taskdata));
  1821. #if OMPT_SUPPORT
  1822. if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
  1823. parent->ompt_task_info.frame.enter_frame = ompt_data_none;
  1824. }
  1825. #endif
  1826. return res;
  1827. }
  1828. template <bool ompt>
  1829. static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
  1830. void *frame_address,
  1831. void *return_address) {
  1832. kmp_taskdata_t *taskdata = nullptr;
  1833. kmp_info_t *thread;
  1834. int thread_finished = FALSE;
  1835. KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
  1836. KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
  1837. KMP_DEBUG_ASSERT(gtid >= 0);
  1838. if (__kmp_tasking_mode != tskm_immediate_exec) {
  1839. thread = __kmp_threads[gtid];
  1840. taskdata = thread->th.th_current_task;
  1841. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1842. ompt_data_t *my_task_data;
  1843. ompt_data_t *my_parallel_data;
  1844. if (ompt) {
  1845. my_task_data = &(taskdata->ompt_task_info.task_data);
  1846. my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
  1847. taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
  1848. if (ompt_enabled.ompt_callback_sync_region) {
  1849. ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
  1850. ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
  1851. my_task_data, return_address);
  1852. }
  1853. if (ompt_enabled.ompt_callback_sync_region_wait) {
  1854. ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
  1855. ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
  1856. my_task_data, return_address);
  1857. }
  1858. }
  1859. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  1860. // Debugger: The taskwait is active. Store location and thread encountered the
  1861. // taskwait.
  1862. #if USE_ITT_BUILD
  1863. // Note: These values are used by ITT events as well.
  1864. #endif /* USE_ITT_BUILD */
  1865. taskdata->td_taskwait_counter += 1;
  1866. taskdata->td_taskwait_ident = loc_ref;
  1867. taskdata->td_taskwait_thread = gtid + 1;
  1868. #if USE_ITT_BUILD
  1869. void *itt_sync_obj = NULL;
  1870. #if USE_ITT_NOTIFY
  1871. KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
  1872. #endif /* USE_ITT_NOTIFY */
  1873. #endif /* USE_ITT_BUILD */
  1874. bool must_wait =
  1875. !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
  1876. must_wait = must_wait || (thread->th.th_task_team != NULL &&
  1877. thread->th.th_task_team->tt.tt_found_proxy_tasks);
  1878. // If hidden helper thread is encountered, we must enable wait here.
  1879. must_wait =
  1880. must_wait ||
  1881. (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
  1882. thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
  1883. if (must_wait) {
  1884. kmp_flag_32<false, false> flag(
  1885. RCAST(std::atomic<kmp_uint32> *,
  1886. &(taskdata->td_incomplete_child_tasks)),
  1887. 0U);
  1888. while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
  1889. flag.execute_tasks(thread, gtid, FALSE,
  1890. &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
  1891. __kmp_task_stealing_constraint);
  1892. }
  1893. }
  1894. #if USE_ITT_BUILD
  1895. KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
  1896. KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children
  1897. #endif /* USE_ITT_BUILD */
  1898. // Debugger: The taskwait is completed. Location remains, but thread is
  1899. // negated.
  1900. taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
  1901. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1902. if (ompt) {
  1903. if (ompt_enabled.ompt_callback_sync_region_wait) {
  1904. ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
  1905. ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
  1906. my_task_data, return_address);
  1907. }
  1908. if (ompt_enabled.ompt_callback_sync_region) {
  1909. ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
  1910. ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
  1911. my_task_data, return_address);
  1912. }
  1913. taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
  1914. }
  1915. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  1916. }
  1917. KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
  1918. "returning TASK_CURRENT_NOT_QUEUED\n",
  1919. gtid, taskdata));
  1920. return TASK_CURRENT_NOT_QUEUED;
  1921. }
  1922. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1923. OMPT_NOINLINE
  1924. static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid,
  1925. void *frame_address,
  1926. void *return_address) {
  1927. return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
  1928. return_address);
  1929. }
  1930. #endif // OMPT_SUPPORT && OMPT_OPTIONAL
  1931. // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
  1932. // complete
  1933. kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
  1934. #if OMPT_SUPPORT && OMPT_OPTIONAL
  1935. if (UNLIKELY(ompt_enabled.enabled)) {
  1936. OMPT_STORE_RETURN_ADDRESS(gtid);
  1937. return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
  1938. OMPT_LOAD_RETURN_ADDRESS(gtid));
  1939. }
  1940. #endif
  1941. return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
  1942. }
  1943. // __kmpc_omp_taskyield: switch to a different task
  1944. kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
  1945. kmp_taskdata_t *taskdata = NULL;
  1946. kmp_info_t *thread;
  1947. int thread_finished = FALSE;
  1948. KMP_COUNT_BLOCK(OMP_TASKYIELD);
  1949. KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
  1950. KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
  1951. gtid, loc_ref, end_part));
  1952. __kmp_assert_valid_gtid(gtid);
  1953. if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
  1954. thread = __kmp_threads[gtid];
  1955. taskdata = thread->th.th_current_task;
  1956. // Should we model this as a task wait or not?
  1957. // Debugger: The taskwait is active. Store location and thread encountered the
  1958. // taskwait.
  1959. #if USE_ITT_BUILD
  1960. // Note: These values are used by ITT events as well.
  1961. #endif /* USE_ITT_BUILD */
  1962. taskdata->td_taskwait_counter += 1;
  1963. taskdata->td_taskwait_ident = loc_ref;
  1964. taskdata->td_taskwait_thread = gtid + 1;
  1965. #if USE_ITT_BUILD
  1966. void *itt_sync_obj = NULL;
  1967. #if USE_ITT_NOTIFY
  1968. KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
  1969. #endif /* USE_ITT_NOTIFY */
  1970. #endif /* USE_ITT_BUILD */
  1971. if (!taskdata->td_flags.team_serial) {
  1972. kmp_task_team_t *task_team = thread->th.th_task_team;
  1973. if (task_team != NULL) {
  1974. if (KMP_TASKING_ENABLED(task_team)) {
  1975. #if OMPT_SUPPORT
  1976. if (UNLIKELY(ompt_enabled.enabled))
  1977. thread->th.ompt_thread_info.ompt_task_yielded = 1;
  1978. #endif
  1979. __kmp_execute_tasks_32(
  1980. thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
  1981. &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
  1982. __kmp_task_stealing_constraint);
  1983. #if OMPT_SUPPORT
  1984. if (UNLIKELY(ompt_enabled.enabled))
  1985. thread->th.ompt_thread_info.ompt_task_yielded = 0;
  1986. #endif
  1987. }
  1988. }
  1989. }
  1990. #if USE_ITT_BUILD
  1991. KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
  1992. #endif /* USE_ITT_BUILD */
  1993. // Debugger: The taskwait is completed. Location remains, but thread is
  1994. // negated.
  1995. taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
  1996. }
  1997. KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
  1998. "returning TASK_CURRENT_NOT_QUEUED\n",
  1999. gtid, taskdata));
  2000. return TASK_CURRENT_NOT_QUEUED;
  2001. }
  2002. // Task Reduction implementation
  2003. //
  2004. // Note: initial implementation didn't take into account the possibility
  2005. // to specify omp_orig for initializer of the UDR (user defined reduction).
  2006. // Corrected implementation takes into account the omp_orig object.
  2007. // Compiler is free to use old implementation if omp_orig is not specified.
  2008. /*!
  2009. @ingroup BASIC_TYPES
  2010. @{
  2011. */
  2012. /*!
  2013. Flags for special info per task reduction item.
  2014. */
  2015. typedef struct kmp_taskred_flags {
  2016. /*! 1 - use lazy alloc/init (e.g. big objects, num tasks < num threads) */
  2017. unsigned lazy_priv : 1;
  2018. unsigned reserved31 : 31;
  2019. } kmp_taskred_flags_t;
  2020. /*!
  2021. Internal struct for reduction data item related info set up by compiler.
  2022. */
  2023. typedef struct kmp_task_red_input {
  2024. void *reduce_shar; /**< shared between tasks item to reduce into */
  2025. size_t reduce_size; /**< size of data item in bytes */
  2026. // three compiler-generated routines (init, fini are optional):
  2027. void *reduce_init; /**< data initialization routine (single parameter) */
  2028. void *reduce_fini; /**< data finalization routine */
  2029. void *reduce_comb; /**< data combiner routine */
  2030. kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
  2031. } kmp_task_red_input_t;
  2032. /*!
  2033. Internal struct for reduction data item related info saved by the library.
  2034. */
  2035. typedef struct kmp_taskred_data {
  2036. void *reduce_shar; /**< shared between tasks item to reduce into */
  2037. size_t reduce_size; /**< size of data item */
  2038. kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
  2039. void *reduce_priv; /**< array of thread specific items */
  2040. void *reduce_pend; /**< end of private data for faster comparison op */
  2041. // three compiler-generated routines (init, fini are optional):
  2042. void *reduce_comb; /**< data combiner routine */
  2043. void *reduce_init; /**< data initialization routine (two parameters) */
  2044. void *reduce_fini; /**< data finalization routine */
  2045. void *reduce_orig; /**< original item (can be used in UDR initializer) */
  2046. } kmp_taskred_data_t;
  2047. /*!
  2048. Internal struct for reduction data item related info set up by compiler.
  2049. New interface: added reduce_orig field to provide omp_orig for UDR initializer.
  2050. */
  2051. typedef struct kmp_taskred_input {
  2052. void *reduce_shar; /**< shared between tasks item to reduce into */
  2053. void *reduce_orig; /**< original reduction item used for initialization */
  2054. size_t reduce_size; /**< size of data item */
  2055. // three compiler-generated routines (init, fini are optional):
  2056. void *reduce_init; /**< data initialization routine (two parameters) */
  2057. void *reduce_fini; /**< data finalization routine */
  2058. void *reduce_comb; /**< data combiner routine */
  2059. kmp_taskred_flags_t flags; /**< flags for additional info from compiler */
  2060. } kmp_taskred_input_t;
  2061. /*!
  2062. @}
  2063. */
  2064. template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src);
  2065. template <>
  2066. void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item,
  2067. kmp_task_red_input_t &src) {
  2068. item.reduce_orig = NULL;
  2069. }
  2070. template <>
  2071. void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item,
  2072. kmp_taskred_input_t &src) {
  2073. if (src.reduce_orig != NULL) {
  2074. item.reduce_orig = src.reduce_orig;
  2075. } else {
  2076. item.reduce_orig = src.reduce_shar;
  2077. } // non-NULL reduce_orig means new interface used
  2078. }
  2079. template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, size_t j);
  2080. template <>
  2081. void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item,
  2082. size_t offset) {
  2083. ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset);
  2084. }
  2085. template <>
  2086. void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item,
  2087. size_t offset) {
  2088. ((void (*)(void *, void *))item.reduce_init)(
  2089. (char *)(item.reduce_priv) + offset, item.reduce_orig);
  2090. }
  2091. template <typename T>
  2092. void *__kmp_task_reduction_init(int gtid, int num, T *data) {
  2093. __kmp_assert_valid_gtid(gtid);
  2094. kmp_info_t *thread = __kmp_threads[gtid];
  2095. kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
  2096. kmp_uint32 nth = thread->th.th_team_nproc;
  2097. kmp_taskred_data_t *arr;
  2098. // check input data just in case
  2099. KMP_ASSERT(tg != NULL);
  2100. KMP_ASSERT(data != NULL);
  2101. KMP_ASSERT(num > 0);
  2102. if (nth == 1) {
  2103. KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
  2104. gtid, tg));
  2105. return (void *)tg;
  2106. }
  2107. KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
  2108. gtid, tg, num));
  2109. arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
  2110. thread, num * sizeof(kmp_taskred_data_t));
  2111. for (int i = 0; i < num; ++i) {
  2112. size_t size = data[i].reduce_size - 1;
  2113. // round the size up to cache line per thread-specific item
  2114. size += CACHE_LINE - size % CACHE_LINE;
  2115. KMP_ASSERT(data[i].reduce_comb != NULL); // combiner is mandatory
  2116. arr[i].reduce_shar = data[i].reduce_shar;
  2117. arr[i].reduce_size = size;
  2118. arr[i].flags = data[i].flags;
  2119. arr[i].reduce_comb = data[i].reduce_comb;
  2120. arr[i].reduce_init = data[i].reduce_init;
  2121. arr[i].reduce_fini = data[i].reduce_fini;
  2122. __kmp_assign_orig<T>(arr[i], data[i]);
  2123. if (!arr[i].flags.lazy_priv) {
  2124. // allocate cache-line aligned block and fill it with zeros
  2125. arr[i].reduce_priv = __kmp_allocate(nth * size);
  2126. arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
  2127. if (arr[i].reduce_init != NULL) {
  2128. // initialize all thread-specific items
  2129. for (size_t j = 0; j < nth; ++j) {
  2130. __kmp_call_init<T>(arr[i], j * size);
  2131. }
  2132. }
  2133. } else {
  2134. // only allocate space for pointers now,
  2135. // objects will be lazily allocated/initialized if/when requested
  2136. // note that __kmp_allocate zeroes the allocated memory
  2137. arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
  2138. }
  2139. }
  2140. tg->reduce_data = (void *)arr;
  2141. tg->reduce_num_data = num;
  2142. return (void *)tg;
  2143. }
  2144. /*!
  2145. @ingroup TASKING
  2146. @param gtid Global thread ID
  2147. @param num Number of data items to reduce
  2148. @param data Array of data for reduction
  2149. @return The taskgroup identifier
  2150. Initialize task reduction for the taskgroup.
  2151. Note: this entry supposes the optional compiler-generated initializer routine
  2152. has single parameter - pointer to object to be initialized. That means
  2153. the reduction either does not use omp_orig object, or the omp_orig is accessible
  2154. without help of the runtime library.
  2155. */
  2156. void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
  2157. return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data);
  2158. }
  2159. /*!
  2160. @ingroup TASKING
  2161. @param gtid Global thread ID
  2162. @param num Number of data items to reduce
  2163. @param data Array of data for reduction
  2164. @return The taskgroup identifier
  2165. Initialize task reduction for the taskgroup.
  2166. Note: this entry supposes the optional compiler-generated initializer routine
  2167. has two parameters, pointer to object to be initialized and pointer to omp_orig
  2168. */
  2169. void *__kmpc_taskred_init(int gtid, int num, void *data) {
  2170. return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data);
  2171. }
  2172. // Copy task reduction data (except for shared pointers).
  2173. template <typename T>
  2174. void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data,
  2175. kmp_taskgroup_t *tg, void *reduce_data) {
  2176. kmp_taskred_data_t *arr;
  2177. KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
  2178. " from data %p\n",
  2179. thr, tg, reduce_data));
  2180. arr = (kmp_taskred_data_t *)__kmp_thread_malloc(
  2181. thr, num * sizeof(kmp_taskred_data_t));
  2182. // threads will share private copies, thunk routines, sizes, flags, etc.:
  2183. KMP_MEMCPY(arr, reduce_data, num * sizeof(kmp_taskred_data_t));
  2184. for (int i = 0; i < num; ++i) {
  2185. arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers
  2186. }
  2187. tg->reduce_data = (void *)arr;
  2188. tg->reduce_num_data = num;
  2189. }
  2190. /*!
  2191. @ingroup TASKING
  2192. @param gtid Global thread ID
  2193. @param tskgrp The taskgroup ID (optional)
  2194. @param data Shared location of the item
  2195. @return The pointer to per-thread data
  2196. Get thread-specific location of data item
  2197. */
  2198. void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
  2199. __kmp_assert_valid_gtid(gtid);
  2200. kmp_info_t *thread = __kmp_threads[gtid];
  2201. kmp_int32 nth = thread->th.th_team_nproc;
  2202. if (nth == 1)
  2203. return data; // nothing to do
  2204. kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
  2205. if (tg == NULL)
  2206. tg = thread->th.th_current_task->td_taskgroup;
  2207. KMP_ASSERT(tg != NULL);
  2208. kmp_taskred_data_t *arr = (kmp_taskred_data_t *)(tg->reduce_data);
  2209. kmp_int32 num = tg->reduce_num_data;
  2210. kmp_int32 tid = thread->th.th_info.ds.ds_tid;
  2211. KMP_ASSERT(data != NULL);
  2212. while (tg != NULL) {
  2213. for (int i = 0; i < num; ++i) {
  2214. if (!arr[i].flags.lazy_priv) {
  2215. if (data == arr[i].reduce_shar ||
  2216. (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
  2217. return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
  2218. } else {
  2219. // check shared location first
  2220. void **p_priv = (void **)(arr[i].reduce_priv);
  2221. if (data == arr[i].reduce_shar)
  2222. goto found;
  2223. // check if we get some thread specific location as parameter
  2224. for (int j = 0; j < nth; ++j)
  2225. if (data == p_priv[j])
  2226. goto found;
  2227. continue; // not found, continue search
  2228. found:
  2229. if (p_priv[tid] == NULL) {
  2230. // allocate thread specific object lazily
  2231. p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
  2232. if (arr[i].reduce_init != NULL) {
  2233. if (arr[i].reduce_orig != NULL) { // new interface
  2234. ((void (*)(void *, void *))arr[i].reduce_init)(
  2235. p_priv[tid], arr[i].reduce_orig);
  2236. } else { // old interface (single parameter)
  2237. ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]);
  2238. }
  2239. }
  2240. }
  2241. return p_priv[tid];
  2242. }
  2243. }
  2244. tg = tg->parent;
  2245. arr = (kmp_taskred_data_t *)(tg->reduce_data);
  2246. num = tg->reduce_num_data;
  2247. }
  2248. KMP_ASSERT2(0, "Unknown task reduction item");
  2249. return NULL; // ERROR, this line never executed
  2250. }
  2251. // Finalize task reduction.
  2252. // Called from __kmpc_end_taskgroup()
  2253. static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
  2254. kmp_int32 nth = th->th.th_team_nproc;
  2255. KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
  2256. kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data;
  2257. kmp_int32 num = tg->reduce_num_data;
  2258. for (int i = 0; i < num; ++i) {
  2259. void *sh_data = arr[i].reduce_shar;
  2260. void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
  2261. void (*f_comb)(void *, void *) =
  2262. (void (*)(void *, void *))(arr[i].reduce_comb);
  2263. if (!arr[i].flags.lazy_priv) {
  2264. void *pr_data = arr[i].reduce_priv;
  2265. size_t size = arr[i].reduce_size;
  2266. for (int j = 0; j < nth; ++j) {
  2267. void *priv_data = (char *)pr_data + j * size;
  2268. f_comb(sh_data, priv_data); // combine results
  2269. if (f_fini)
  2270. f_fini(priv_data); // finalize if needed
  2271. }
  2272. } else {
  2273. void **pr_data = (void **)(arr[i].reduce_priv);
  2274. for (int j = 0; j < nth; ++j) {
  2275. if (pr_data[j] != NULL) {
  2276. f_comb(sh_data, pr_data[j]); // combine results
  2277. if (f_fini)
  2278. f_fini(pr_data[j]); // finalize if needed
  2279. __kmp_free(pr_data[j]);
  2280. }
  2281. }
  2282. }
  2283. __kmp_free(arr[i].reduce_priv);
  2284. }
  2285. __kmp_thread_free(th, arr);
  2286. tg->reduce_data = NULL;
  2287. tg->reduce_num_data = 0;
  2288. }
  2289. // Cleanup task reduction data for parallel or worksharing,
  2290. // do not touch task private data other threads still working with.
  2291. // Called from __kmpc_end_taskgroup()
  2292. static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
  2293. __kmp_thread_free(th, tg->reduce_data);
  2294. tg->reduce_data = NULL;
  2295. tg->reduce_num_data = 0;
  2296. }
  2297. template <typename T>
  2298. void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
  2299. int num, T *data) {
  2300. __kmp_assert_valid_gtid(gtid);
  2301. kmp_info_t *thr = __kmp_threads[gtid];
  2302. kmp_int32 nth = thr->th.th_team_nproc;
  2303. __kmpc_taskgroup(loc, gtid); // form new taskgroup first
  2304. if (nth == 1) {
  2305. KA_TRACE(10,
  2306. ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
  2307. gtid, thr->th.th_current_task->td_taskgroup));
  2308. return (void *)thr->th.th_current_task->td_taskgroup;
  2309. }
  2310. kmp_team_t *team = thr->th.th_team;
  2311. void *reduce_data;
  2312. kmp_taskgroup_t *tg;
  2313. reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
  2314. if (reduce_data == NULL &&
  2315. __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
  2316. (void *)1)) {
  2317. // single thread enters this block to initialize common reduction data
  2318. KMP_DEBUG_ASSERT(reduce_data == NULL);
  2319. // first initialize own data, then make a copy other threads can use
  2320. tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
  2321. reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t));
  2322. KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t));
  2323. // fini counters should be 0 at this point
  2324. KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
  2325. KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
  2326. KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
  2327. } else {
  2328. while (
  2329. (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
  2330. (void *)1) { // wait for task reduction initialization
  2331. KMP_CPU_PAUSE();
  2332. }
  2333. KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here
  2334. tg = thr->th.th_current_task->td_taskgroup;
  2335. __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
  2336. }
  2337. return tg;
  2338. }
  2339. /*!
  2340. @ingroup TASKING
  2341. @param loc Source location info
  2342. @param gtid Global thread ID
  2343. @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
  2344. @param num Number of data items to reduce
  2345. @param data Array of data for reduction
  2346. @return The taskgroup identifier
  2347. Initialize task reduction for a parallel or worksharing.
  2348. Note: this entry supposes the optional compiler-generated initializer routine
  2349. has single parameter - pointer to object to be initialized. That means
  2350. the reduction either does not use omp_orig object, or the omp_orig is accessible
  2351. without help of the runtime library.
  2352. */
  2353. void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws,
  2354. int num, void *data) {
  2355. return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
  2356. (kmp_task_red_input_t *)data);
  2357. }
  2358. /*!
  2359. @ingroup TASKING
  2360. @param loc Source location info
  2361. @param gtid Global thread ID
  2362. @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
  2363. @param num Number of data items to reduce
  2364. @param data Array of data for reduction
  2365. @return The taskgroup identifier
  2366. Initialize task reduction for a parallel or worksharing.
  2367. Note: this entry supposes the optional compiler-generated initializer routine
  2368. has two parameters, pointer to object to be initialized and pointer to omp_orig
  2369. */
  2370. void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num,
  2371. void *data) {
  2372. return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
  2373. (kmp_taskred_input_t *)data);
  2374. }
  2375. /*!
  2376. @ingroup TASKING
  2377. @param loc Source location info
  2378. @param gtid Global thread ID
  2379. @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise
  2380. Finalize task reduction for a parallel or worksharing.
  2381. */
  2382. void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) {
  2383. __kmpc_end_taskgroup(loc, gtid);
  2384. }
  2385. // __kmpc_taskgroup: Start a new taskgroup
  2386. void __kmpc_taskgroup(ident_t *loc, int gtid) {
  2387. __kmp_assert_valid_gtid(gtid);
  2388. kmp_info_t *thread = __kmp_threads[gtid];
  2389. kmp_taskdata_t *taskdata = thread->th.th_current_task;
  2390. kmp_taskgroup_t *tg_new =
  2391. (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
  2392. KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
  2393. KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
  2394. KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
  2395. tg_new->parent = taskdata->td_taskgroup;
  2396. tg_new->reduce_data = NULL;
  2397. tg_new->reduce_num_data = 0;
  2398. tg_new->gomp_data = NULL;
  2399. taskdata->td_taskgroup = tg_new;
  2400. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2401. if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
  2402. void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2403. if (!codeptr)
  2404. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2405. kmp_team_t *team = thread->th.th_team;
  2406. ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
  2407. // FIXME: I think this is wrong for lwt!
  2408. ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
  2409. ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
  2410. ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
  2411. &(my_task_data), codeptr);
  2412. }
  2413. #endif
  2414. }
  2415. // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
  2416. // and its descendants are complete
  2417. void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
  2418. __kmp_assert_valid_gtid(gtid);
  2419. kmp_info_t *thread = __kmp_threads[gtid];
  2420. kmp_taskdata_t *taskdata = thread->th.th_current_task;
  2421. kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
  2422. int thread_finished = FALSE;
  2423. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2424. kmp_team_t *team;
  2425. ompt_data_t my_task_data;
  2426. ompt_data_t my_parallel_data;
  2427. void *codeptr = nullptr;
  2428. if (UNLIKELY(ompt_enabled.enabled)) {
  2429. team = thread->th.th_team;
  2430. my_task_data = taskdata->ompt_task_info.task_data;
  2431. // FIXME: I think this is wrong for lwt!
  2432. my_parallel_data = team->t.ompt_team_info.parallel_data;
  2433. codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
  2434. if (!codeptr)
  2435. codeptr = OMPT_GET_RETURN_ADDRESS(0);
  2436. }
  2437. #endif
  2438. KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
  2439. KMP_DEBUG_ASSERT(taskgroup != NULL);
  2440. KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
  2441. if (__kmp_tasking_mode != tskm_immediate_exec) {
  2442. // mark task as waiting not on a barrier
  2443. taskdata->td_taskwait_counter += 1;
  2444. taskdata->td_taskwait_ident = loc;
  2445. taskdata->td_taskwait_thread = gtid + 1;
  2446. #if USE_ITT_BUILD
  2447. // For ITT the taskgroup wait is similar to taskwait until we need to
  2448. // distinguish them
  2449. void *itt_sync_obj = NULL;
  2450. #if USE_ITT_NOTIFY
  2451. KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
  2452. #endif /* USE_ITT_NOTIFY */
  2453. #endif /* USE_ITT_BUILD */
  2454. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2455. if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
  2456. ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
  2457. ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
  2458. &(my_task_data), codeptr);
  2459. }
  2460. #endif
  2461. if (!taskdata->td_flags.team_serial ||
  2462. (thread->th.th_task_team != NULL &&
  2463. (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
  2464. thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
  2465. kmp_flag_32<false, false> flag(
  2466. RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
  2467. while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
  2468. flag.execute_tasks(thread, gtid, FALSE,
  2469. &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
  2470. __kmp_task_stealing_constraint);
  2471. }
  2472. }
  2473. taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting
  2474. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2475. if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
  2476. ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
  2477. ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
  2478. &(my_task_data), codeptr);
  2479. }
  2480. #endif
  2481. #if USE_ITT_BUILD
  2482. KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
  2483. KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants
  2484. #endif /* USE_ITT_BUILD */
  2485. }
  2486. KMP_DEBUG_ASSERT(taskgroup->count == 0);
  2487. if (taskgroup->reduce_data != NULL &&
  2488. !taskgroup->gomp_data) { // need to reduce?
  2489. int cnt;
  2490. void *reduce_data;
  2491. kmp_team_t *t = thread->th.th_team;
  2492. kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data;
  2493. // check if <priv> data of the first reduction variable shared for the team
  2494. void *priv0 = arr[0].reduce_priv;
  2495. if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
  2496. ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
  2497. // finishing task reduction on parallel
  2498. cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
  2499. if (cnt == thread->th.th_team_nproc - 1) {
  2500. // we are the last thread passing __kmpc_reduction_modifier_fini()
  2501. // finalize task reduction:
  2502. __kmp_task_reduction_fini(thread, taskgroup);
  2503. // cleanup fields in the team structure:
  2504. // TODO: is relaxed store enough here (whole barrier should follow)?
  2505. __kmp_thread_free(thread, reduce_data);
  2506. KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
  2507. KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
  2508. } else {
  2509. // we are not the last thread passing __kmpc_reduction_modifier_fini(),
  2510. // so do not finalize reduction, just clean own copy of the data
  2511. __kmp_task_reduction_clean(thread, taskgroup);
  2512. }
  2513. } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
  2514. NULL &&
  2515. ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) {
  2516. // finishing task reduction on worksharing
  2517. cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
  2518. if (cnt == thread->th.th_team_nproc - 1) {
  2519. // we are the last thread passing __kmpc_reduction_modifier_fini()
  2520. __kmp_task_reduction_fini(thread, taskgroup);
  2521. // cleanup fields in team structure:
  2522. // TODO: is relaxed store enough here (whole barrier should follow)?
  2523. __kmp_thread_free(thread, reduce_data);
  2524. KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
  2525. KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
  2526. } else {
  2527. // we are not the last thread passing __kmpc_reduction_modifier_fini(),
  2528. // so do not finalize reduction, just clean own copy of the data
  2529. __kmp_task_reduction_clean(thread, taskgroup);
  2530. }
  2531. } else {
  2532. // finishing task reduction on taskgroup
  2533. __kmp_task_reduction_fini(thread, taskgroup);
  2534. }
  2535. }
  2536. // Restore parent taskgroup for the current task
  2537. taskdata->td_taskgroup = taskgroup->parent;
  2538. __kmp_thread_free(thread, taskgroup);
  2539. KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
  2540. gtid, taskdata));
  2541. #if OMPT_SUPPORT && OMPT_OPTIONAL
  2542. if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
  2543. ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
  2544. ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
  2545. &(my_task_data), codeptr);
  2546. }
  2547. #endif
  2548. }
  2549. static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
  2550. kmp_task_team_t *task_team,
  2551. kmp_int32 is_constrained) {
  2552. kmp_task_t *task = NULL;
  2553. kmp_taskdata_t *taskdata;
  2554. kmp_taskdata_t *current;
  2555. kmp_thread_data_t *thread_data;
  2556. int ntasks = task_team->tt.tt_num_task_pri;
  2557. if (ntasks == 0) {
  2558. KA_TRACE(
  2559. 20, ("__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
  2560. return NULL;
  2561. }
  2562. do {
  2563. // decrement num_tasks to "reserve" one task to get for execution
  2564. if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
  2565. ntasks - 1))
  2566. break;
  2567. } while (ntasks > 0);
  2568. if (ntasks == 0) {
  2569. KA_TRACE(20, ("__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
  2570. __kmp_get_gtid()));
  2571. return NULL;
  2572. }
  2573. // We got a "ticket" to get a "reserved" priority task
  2574. int deque_ntasks;
  2575. kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
  2576. do {
  2577. KMP_ASSERT(list != NULL);
  2578. thread_data = &list->td;
  2579. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  2580. deque_ntasks = thread_data->td.td_deque_ntasks;
  2581. if (deque_ntasks == 0) {
  2582. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2583. KA_TRACE(20, ("__kmp_get_priority_task: T#%d No tasks to get from %p\n",
  2584. __kmp_get_gtid(), thread_data));
  2585. list = list->next;
  2586. }
  2587. } while (deque_ntasks == 0);
  2588. KMP_DEBUG_ASSERT(deque_ntasks);
  2589. int target = thread_data->td.td_deque_head;
  2590. current = __kmp_threads[gtid]->th.th_current_task;
  2591. taskdata = thread_data->td.td_deque[target];
  2592. if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
  2593. // Bump head pointer and Wrap.
  2594. thread_data->td.td_deque_head =
  2595. (target + 1) & TASK_DEQUE_MASK(thread_data->td);
  2596. } else {
  2597. if (!task_team->tt.tt_untied_task_encountered) {
  2598. // The TSC does not allow to steal victim task
  2599. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2600. KA_TRACE(20, ("__kmp_get_priority_task(exit #3): T#%d could not get task "
  2601. "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
  2602. gtid, thread_data, task_team, deque_ntasks, target,
  2603. thread_data->td.td_deque_tail));
  2604. task_team->tt.tt_num_task_pri++; // atomic inc, restore value
  2605. return NULL;
  2606. }
  2607. int i;
  2608. // walk through the deque trying to steal any task
  2609. taskdata = NULL;
  2610. for (i = 1; i < deque_ntasks; ++i) {
  2611. target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
  2612. taskdata = thread_data->td.td_deque[target];
  2613. if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
  2614. break; // found task to execute
  2615. } else {
  2616. taskdata = NULL;
  2617. }
  2618. }
  2619. if (taskdata == NULL) {
  2620. // No appropriate candidate found to execute
  2621. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2622. KA_TRACE(
  2623. 10, ("__kmp_get_priority_task(exit #4): T#%d could not get task from "
  2624. "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
  2625. gtid, thread_data, task_team, deque_ntasks,
  2626. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2627. task_team->tt.tt_num_task_pri++; // atomic inc, restore value
  2628. return NULL;
  2629. }
  2630. int prev = target;
  2631. for (i = i + 1; i < deque_ntasks; ++i) {
  2632. // shift remaining tasks in the deque left by 1
  2633. target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
  2634. thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
  2635. prev = target;
  2636. }
  2637. KMP_DEBUG_ASSERT(
  2638. thread_data->td.td_deque_tail ==
  2639. (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
  2640. thread_data->td.td_deque_tail = target; // tail -= 1 (wrapped))
  2641. }
  2642. thread_data->td.td_deque_ntasks = deque_ntasks - 1;
  2643. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2644. task = KMP_TASKDATA_TO_TASK(taskdata);
  2645. return task;
  2646. }
  2647. // __kmp_remove_my_task: remove a task from my own deque
  2648. static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
  2649. kmp_task_team_t *task_team,
  2650. kmp_int32 is_constrained) {
  2651. kmp_task_t *task;
  2652. kmp_taskdata_t *taskdata;
  2653. kmp_thread_data_t *thread_data;
  2654. kmp_uint32 tail;
  2655. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  2656. KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
  2657. NULL); // Caller should check this condition
  2658. thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
  2659. KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
  2660. gtid, thread_data->td.td_deque_ntasks,
  2661. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2662. if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
  2663. KA_TRACE(10,
  2664. ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
  2665. "ntasks=%d head=%u tail=%u\n",
  2666. gtid, thread_data->td.td_deque_ntasks,
  2667. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2668. return NULL;
  2669. }
  2670. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  2671. if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
  2672. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2673. KA_TRACE(10,
  2674. ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
  2675. "ntasks=%d head=%u tail=%u\n",
  2676. gtid, thread_data->td.td_deque_ntasks,
  2677. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2678. return NULL;
  2679. }
  2680. tail = (thread_data->td.td_deque_tail - 1) &
  2681. TASK_DEQUE_MASK(thread_data->td); // Wrap index.
  2682. taskdata = thread_data->td.td_deque[tail];
  2683. if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
  2684. thread->th.th_current_task)) {
  2685. // The TSC does not allow to steal victim task
  2686. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2687. KA_TRACE(10,
  2688. ("__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
  2689. "ntasks=%d head=%u tail=%u\n",
  2690. gtid, thread_data->td.td_deque_ntasks,
  2691. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2692. return NULL;
  2693. }
  2694. thread_data->td.td_deque_tail = tail;
  2695. TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
  2696. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  2697. KA_TRACE(10, ("__kmp_remove_my_task(exit #4): T#%d task %p removed: "
  2698. "ntasks=%d head=%u tail=%u\n",
  2699. gtid, taskdata, thread_data->td.td_deque_ntasks,
  2700. thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
  2701. task = KMP_TASKDATA_TO_TASK(taskdata);
  2702. return task;
  2703. }
  2704. // __kmp_steal_task: remove a task from another thread's deque
  2705. // Assume that calling thread has already checked existence of
  2706. // task_team thread_data before calling this routine.
  2707. static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
  2708. kmp_task_team_t *task_team,
  2709. std::atomic<kmp_int32> *unfinished_threads,
  2710. int *thread_finished,
  2711. kmp_int32 is_constrained) {
  2712. kmp_task_t *task;
  2713. kmp_taskdata_t *taskdata;
  2714. kmp_taskdata_t *current;
  2715. kmp_thread_data_t *victim_td, *threads_data;
  2716. kmp_int32 target;
  2717. kmp_int32 victim_tid;
  2718. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  2719. threads_data = task_team->tt.tt_threads_data;
  2720. KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
  2721. victim_tid = victim_thr->th.th_info.ds.ds_tid;
  2722. victim_td = &threads_data[victim_tid];
  2723. KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
  2724. "task_team=%p ntasks=%d head=%u tail=%u\n",
  2725. gtid, __kmp_gtid_from_thread(victim_thr), task_team,
  2726. victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
  2727. victim_td->td.td_deque_tail));
  2728. if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
  2729. KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
  2730. "task_team=%p ntasks=%d head=%u tail=%u\n",
  2731. gtid, __kmp_gtid_from_thread(victim_thr), task_team,
  2732. victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
  2733. victim_td->td.td_deque_tail));
  2734. return NULL;
  2735. }
  2736. __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
  2737. int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
  2738. // Check again after we acquire the lock
  2739. if (ntasks == 0) {
  2740. __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
  2741. KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
  2742. "task_team=%p ntasks=%d head=%u tail=%u\n",
  2743. gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
  2744. victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
  2745. return NULL;
  2746. }
  2747. KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
  2748. current = __kmp_threads[gtid]->th.th_current_task;
  2749. taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
  2750. if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
  2751. // Bump head pointer and Wrap.
  2752. victim_td->td.td_deque_head =
  2753. (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
  2754. } else {
  2755. if (!task_team->tt.tt_untied_task_encountered) {
  2756. // The TSC does not allow to steal victim task
  2757. __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
  2758. KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d could not steal from "
  2759. "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
  2760. gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
  2761. victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
  2762. return NULL;
  2763. }
  2764. int i;
  2765. // walk through victim's deque trying to steal any task
  2766. target = victim_td->td.td_deque_head;
  2767. taskdata = NULL;
  2768. for (i = 1; i < ntasks; ++i) {
  2769. target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
  2770. taskdata = victim_td->td.td_deque[target];
  2771. if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
  2772. break; // found victim task
  2773. } else {
  2774. taskdata = NULL;
  2775. }
  2776. }
  2777. if (taskdata == NULL) {
  2778. // No appropriate candidate to steal found
  2779. __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
  2780. KA_TRACE(10, ("__kmp_steal_task(exit #4): T#%d could not steal from "
  2781. "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
  2782. gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
  2783. victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
  2784. return NULL;
  2785. }
  2786. int prev = target;
  2787. for (i = i + 1; i < ntasks; ++i) {
  2788. // shift remaining tasks in the deque left by 1
  2789. target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
  2790. victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
  2791. prev = target;
  2792. }
  2793. KMP_DEBUG_ASSERT(
  2794. victim_td->td.td_deque_tail ==
  2795. (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
  2796. victim_td->td.td_deque_tail = target; // tail -= 1 (wrapped))
  2797. }
  2798. if (*thread_finished) {
  2799. // We need to un-mark this victim as a finished victim. This must be done
  2800. // before releasing the lock, or else other threads (starting with the
  2801. // primary thread victim) might be prematurely released from the barrier!!!
  2802. #if KMP_DEBUG
  2803. kmp_int32 count =
  2804. #endif
  2805. KMP_ATOMIC_INC(unfinished_threads);
  2806. KA_TRACE(
  2807. 20,
  2808. ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
  2809. gtid, count + 1, task_team));
  2810. *thread_finished = FALSE;
  2811. }
  2812. TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
  2813. __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
  2814. KMP_COUNT_BLOCK(TASK_stolen);
  2815. KA_TRACE(10,
  2816. ("__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
  2817. "task_team=%p ntasks=%d head=%u tail=%u\n",
  2818. gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
  2819. ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
  2820. task = KMP_TASKDATA_TO_TASK(taskdata);
  2821. return task;
  2822. }
  2823. // __kmp_execute_tasks_template: Choose and execute tasks until either the
  2824. // condition is statisfied (return true) or there are none left (return false).
  2825. //
  2826. // final_spin is TRUE if this is the spin at the release barrier.
  2827. // thread_finished indicates whether the thread is finished executing all
  2828. // the tasks it has on its deque, and is at the release barrier.
  2829. // spinner is the location on which to spin.
  2830. // spinner == NULL means only execute a single task and return.
  2831. // checker is the value to check to terminate the spin.
  2832. template <class C>
  2833. static inline int __kmp_execute_tasks_template(
  2834. kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
  2835. int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
  2836. kmp_int32 is_constrained) {
  2837. kmp_task_team_t *task_team = thread->th.th_task_team;
  2838. kmp_thread_data_t *threads_data;
  2839. kmp_task_t *task;
  2840. kmp_info_t *other_thread;
  2841. kmp_taskdata_t *current_task = thread->th.th_current_task;
  2842. std::atomic<kmp_int32> *unfinished_threads;
  2843. kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
  2844. tid = thread->th.th_info.ds.ds_tid;
  2845. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  2846. KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
  2847. if (task_team == NULL || current_task == NULL)
  2848. return FALSE;
  2849. KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
  2850. "*thread_finished=%d\n",
  2851. gtid, final_spin, *thread_finished));
  2852. thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
  2853. threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
  2854. KMP_DEBUG_ASSERT(threads_data != NULL);
  2855. nthreads = task_team->tt.tt_nproc;
  2856. unfinished_threads = &(task_team->tt.tt_unfinished_threads);
  2857. KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
  2858. task_team->tt.tt_hidden_helper_task_encountered);
  2859. KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
  2860. while (1) { // Outer loop keeps trying to find tasks in case of single thread
  2861. // getting tasks from target constructs
  2862. while (1) { // Inner loop to find a task and execute it
  2863. task = NULL;
  2864. if (task_team->tt.tt_num_task_pri) { // get priority task first
  2865. task = __kmp_get_priority_task(gtid, task_team, is_constrained);
  2866. }
  2867. if (task == NULL && use_own_tasks) { // check own queue next
  2868. task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
  2869. }
  2870. if ((task == NULL) && (nthreads > 1)) { // Steal a task finally
  2871. int asleep = 1;
  2872. use_own_tasks = 0;
  2873. // Try to steal from the last place I stole from successfully.
  2874. if (victim_tid == -2) { // haven't stolen anything yet
  2875. victim_tid = threads_data[tid].td.td_deque_last_stolen;
  2876. if (victim_tid !=
  2877. -1) // if we have a last stolen from victim, get the thread
  2878. other_thread = threads_data[victim_tid].td.td_thr;
  2879. }
  2880. if (victim_tid != -1) { // found last victim
  2881. asleep = 0;
  2882. } else if (!new_victim) { // no recent steals and we haven't already
  2883. // used a new victim; select a random thread
  2884. do { // Find a different thread to steal work from.
  2885. // Pick a random thread. Initial plan was to cycle through all the
  2886. // threads, and only return if we tried to steal from every thread,
  2887. // and failed. Arch says that's not such a great idea.
  2888. victim_tid = __kmp_get_random(thread) % (nthreads - 1);
  2889. if (victim_tid >= tid) {
  2890. ++victim_tid; // Adjusts random distribution to exclude self
  2891. }
  2892. // Found a potential victim
  2893. other_thread = threads_data[victim_tid].td.td_thr;
  2894. // There is a slight chance that __kmp_enable_tasking() did not wake
  2895. // up all threads waiting at the barrier. If victim is sleeping,
  2896. // then wake it up. Since we were going to pay the cache miss
  2897. // penalty for referencing another thread's kmp_info_t struct
  2898. // anyway,
  2899. // the check shouldn't cost too much performance at this point. In
  2900. // extra barrier mode, tasks do not sleep at the separate tasking
  2901. // barrier, so this isn't a problem.
  2902. asleep = 0;
  2903. if ((__kmp_tasking_mode == tskm_task_teams) &&
  2904. (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
  2905. (TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
  2906. NULL)) {
  2907. asleep = 1;
  2908. __kmp_null_resume_wrapper(other_thread);
  2909. // A sleeping thread should not have any tasks on it's queue.
  2910. // There is a slight possibility that it resumes, steals a task
  2911. // from another thread, which spawns more tasks, all in the time
  2912. // that it takes this thread to check => don't write an assertion
  2913. // that the victim's queue is empty. Try stealing from a
  2914. // different thread.
  2915. }
  2916. } while (asleep);
  2917. }
  2918. if (!asleep) {
  2919. // We have a victim to try to steal from
  2920. task = __kmp_steal_task(other_thread, gtid, task_team,
  2921. unfinished_threads, thread_finished,
  2922. is_constrained);
  2923. }
  2924. if (task != NULL) { // set last stolen to victim
  2925. if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
  2926. threads_data[tid].td.td_deque_last_stolen = victim_tid;
  2927. // The pre-refactored code did not try more than 1 successful new
  2928. // vicitm, unless the last one generated more local tasks;
  2929. // new_victim keeps track of this
  2930. new_victim = 1;
  2931. }
  2932. } else { // No tasks found; unset last_stolen
  2933. KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
  2934. victim_tid = -2; // no successful victim found
  2935. }
  2936. }
  2937. if (task == NULL)
  2938. break; // break out of tasking loop
  2939. // Found a task; execute it
  2940. #if USE_ITT_BUILD && USE_ITT_NOTIFY
  2941. if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
  2942. if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
  2943. // get the object reliably
  2944. itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
  2945. }
  2946. __kmp_itt_task_starting(itt_sync_obj);
  2947. }
  2948. #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
  2949. __kmp_invoke_task(gtid, task, current_task);
  2950. #if USE_ITT_BUILD
  2951. if (itt_sync_obj != NULL)
  2952. __kmp_itt_task_finished(itt_sync_obj);
  2953. #endif /* USE_ITT_BUILD */
  2954. // If this thread is only partway through the barrier and the condition is
  2955. // met, then return now, so that the barrier gather/release pattern can
  2956. // proceed. If this thread is in the last spin loop in the barrier,
  2957. // waiting to be released, we know that the termination condition will not
  2958. // be satisfied, so don't waste any cycles checking it.
  2959. if (flag == NULL || (!final_spin && flag->done_check())) {
  2960. KA_TRACE(
  2961. 15,
  2962. ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
  2963. gtid));
  2964. return TRUE;
  2965. }
  2966. if (thread->th.th_task_team == NULL) {
  2967. break;
  2968. }
  2969. KMP_YIELD(__kmp_library == library_throughput); // Yield before next task
  2970. // If execution of a stolen task results in more tasks being placed on our
  2971. // run queue, reset use_own_tasks
  2972. if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
  2973. KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
  2974. "other tasks, restart\n",
  2975. gtid));
  2976. use_own_tasks = 1;
  2977. new_victim = 0;
  2978. }
  2979. }
  2980. // The task source has been exhausted. If in final spin loop of barrier,
  2981. // check if termination condition is satisfied. The work queue may be empty
  2982. // but there might be proxy tasks still executing.
  2983. if (final_spin &&
  2984. KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks) == 0) {
  2985. // First, decrement the #unfinished threads, if that has not already been
  2986. // done. This decrement might be to the spin location, and result in the
  2987. // termination condition being satisfied.
  2988. if (!*thread_finished) {
  2989. #if KMP_DEBUG
  2990. kmp_int32 count = -1 +
  2991. #endif
  2992. KMP_ATOMIC_DEC(unfinished_threads);
  2993. KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
  2994. "unfinished_threads to %d task_team=%p\n",
  2995. gtid, count, task_team));
  2996. *thread_finished = TRUE;
  2997. }
  2998. // It is now unsafe to reference thread->th.th_team !!!
  2999. // Decrementing task_team->tt.tt_unfinished_threads can allow the primary
  3000. // thread to pass through the barrier, where it might reset each thread's
  3001. // th.th_team field for the next parallel region. If we can steal more
  3002. // work, we know that this has not happened yet.
  3003. if (flag != NULL && flag->done_check()) {
  3004. KA_TRACE(
  3005. 15,
  3006. ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
  3007. gtid));
  3008. return TRUE;
  3009. }
  3010. }
  3011. // If this thread's task team is NULL, primary thread has recognized that
  3012. // there are no more tasks; bail out
  3013. if (thread->th.th_task_team == NULL) {
  3014. KA_TRACE(15,
  3015. ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
  3016. return FALSE;
  3017. }
  3018. // Check the flag again to see if it has already done in case to be trapped
  3019. // into infinite loop when a if0 task depends on a hidden helper task
  3020. // outside any parallel region. Detached tasks are not impacted in this case
  3021. // because the only thread executing this function has to execute the proxy
  3022. // task so it is in another code path that has the same check.
  3023. if (flag == NULL || (!final_spin && flag->done_check())) {
  3024. KA_TRACE(15,
  3025. ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
  3026. gtid));
  3027. return TRUE;
  3028. }
  3029. // We could be getting tasks from target constructs; if this is the only
  3030. // thread, keep trying to execute tasks from own queue
  3031. if (nthreads == 1 &&
  3032. KMP_ATOMIC_LD_ACQ(&current_task->td_incomplete_child_tasks))
  3033. use_own_tasks = 1;
  3034. else {
  3035. KA_TRACE(15,
  3036. ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
  3037. return FALSE;
  3038. }
  3039. }
  3040. }
  3041. template <bool C, bool S>
  3042. int __kmp_execute_tasks_32(
  3043. kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag, int final_spin,
  3044. int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
  3045. kmp_int32 is_constrained) {
  3046. return __kmp_execute_tasks_template(
  3047. thread, gtid, flag, final_spin,
  3048. thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
  3049. }
  3050. template <bool C, bool S>
  3051. int __kmp_execute_tasks_64(
  3052. kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag, int final_spin,
  3053. int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
  3054. kmp_int32 is_constrained) {
  3055. return __kmp_execute_tasks_template(
  3056. thread, gtid, flag, final_spin,
  3057. thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
  3058. }
  3059. template <bool C, bool S>
  3060. int __kmp_atomic_execute_tasks_64(
  3061. kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
  3062. int final_spin, int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
  3063. kmp_int32 is_constrained) {
  3064. return __kmp_execute_tasks_template(
  3065. thread, gtid, flag, final_spin,
  3066. thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
  3067. }
  3068. int __kmp_execute_tasks_oncore(
  3069. kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
  3070. int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
  3071. kmp_int32 is_constrained) {
  3072. return __kmp_execute_tasks_template(
  3073. thread, gtid, flag, final_spin,
  3074. thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
  3075. }
  3076. template int
  3077. __kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
  3078. kmp_flag_32<false, false> *, int,
  3079. int *USE_ITT_BUILD_ARG(void *), kmp_int32);
  3080. template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
  3081. kmp_flag_64<false, true> *,
  3082. int,
  3083. int *USE_ITT_BUILD_ARG(void *),
  3084. kmp_int32);
  3085. template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
  3086. kmp_flag_64<true, false> *,
  3087. int,
  3088. int *USE_ITT_BUILD_ARG(void *),
  3089. kmp_int32);
  3090. template int __kmp_atomic_execute_tasks_64<false, true>(
  3091. kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *, int,
  3092. int *USE_ITT_BUILD_ARG(void *), kmp_int32);
  3093. template int __kmp_atomic_execute_tasks_64<true, false>(
  3094. kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *, int,
  3095. int *USE_ITT_BUILD_ARG(void *), kmp_int32);
  3096. // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
  3097. // next barrier so they can assist in executing enqueued tasks.
  3098. // First thread in allocates the task team atomically.
  3099. static void __kmp_enable_tasking(kmp_task_team_t *task_team,
  3100. kmp_info_t *this_thr) {
  3101. kmp_thread_data_t *threads_data;
  3102. int nthreads, i, is_init_thread;
  3103. KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
  3104. __kmp_gtid_from_thread(this_thr)));
  3105. KMP_DEBUG_ASSERT(task_team != NULL);
  3106. KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
  3107. nthreads = task_team->tt.tt_nproc;
  3108. KMP_DEBUG_ASSERT(nthreads > 0);
  3109. KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
  3110. // Allocate or increase the size of threads_data if necessary
  3111. is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
  3112. if (!is_init_thread) {
  3113. // Some other thread already set up the array.
  3114. KA_TRACE(
  3115. 20,
  3116. ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
  3117. __kmp_gtid_from_thread(this_thr)));
  3118. return;
  3119. }
  3120. threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
  3121. KMP_DEBUG_ASSERT(threads_data != NULL);
  3122. if (__kmp_tasking_mode == tskm_task_teams &&
  3123. (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
  3124. // Release any threads sleeping at the barrier, so that they can steal
  3125. // tasks and execute them. In extra barrier mode, tasks do not sleep
  3126. // at the separate tasking barrier, so this isn't a problem.
  3127. for (i = 0; i < nthreads; i++) {
  3128. void *sleep_loc;
  3129. kmp_info_t *thread = threads_data[i].td.td_thr;
  3130. if (i == this_thr->th.th_info.ds.ds_tid) {
  3131. continue;
  3132. }
  3133. // Since we haven't locked the thread's suspend mutex lock at this
  3134. // point, there is a small window where a thread might be putting
  3135. // itself to sleep, but hasn't set the th_sleep_loc field yet.
  3136. // To work around this, __kmp_execute_tasks_template() periodically checks
  3137. // see if other threads are sleeping (using the same random mechanism that
  3138. // is used for task stealing) and awakens them if they are.
  3139. if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
  3140. NULL) {
  3141. KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
  3142. __kmp_gtid_from_thread(this_thr),
  3143. __kmp_gtid_from_thread(thread)));
  3144. __kmp_null_resume_wrapper(thread);
  3145. } else {
  3146. KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
  3147. __kmp_gtid_from_thread(this_thr),
  3148. __kmp_gtid_from_thread(thread)));
  3149. }
  3150. }
  3151. }
  3152. KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
  3153. __kmp_gtid_from_thread(this_thr)));
  3154. }
  3155. /* // TODO: Check the comment consistency
  3156. * Utility routines for "task teams". A task team (kmp_task_t) is kind of
  3157. * like a shadow of the kmp_team_t data struct, with a different lifetime.
  3158. * After a child * thread checks into a barrier and calls __kmp_release() from
  3159. * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
  3160. * longer assume that the kmp_team_t structure is intact (at any moment, the
  3161. * primary thread may exit the barrier code and free the team data structure,
  3162. * and return the threads to the thread pool).
  3163. *
  3164. * This does not work with the tasking code, as the thread is still
  3165. * expected to participate in the execution of any tasks that may have been
  3166. * spawned my a member of the team, and the thread still needs access to all
  3167. * to each thread in the team, so that it can steal work from it.
  3168. *
  3169. * Enter the existence of the kmp_task_team_t struct. It employs a reference
  3170. * counting mechanism, and is allocated by the primary thread before calling
  3171. * __kmp_<barrier_kind>_release, and then is release by the last thread to
  3172. * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
  3173. * of the kmp_task_team_t structs for consecutive barriers can overlap
  3174. * (and will, unless the primary thread is the last thread to exit the barrier
  3175. * release phase, which is not typical). The existence of such a struct is
  3176. * useful outside the context of tasking.
  3177. *
  3178. * We currently use the existence of the threads array as an indicator that
  3179. * tasks were spawned since the last barrier. If the structure is to be
  3180. * useful outside the context of tasking, then this will have to change, but
  3181. * not setting the field minimizes the performance impact of tasking on
  3182. * barriers, when no explicit tasks were spawned (pushed, actually).
  3183. */
  3184. static kmp_task_team_t *__kmp_free_task_teams =
  3185. NULL; // Free list for task_team data structures
  3186. // Lock for task team data structures
  3187. kmp_bootstrap_lock_t __kmp_task_team_lock =
  3188. KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
  3189. // __kmp_alloc_task_deque:
  3190. // Allocates a task deque for a particular thread, and initialize the necessary
  3191. // data structures relating to the deque. This only happens once per thread
  3192. // per task team since task teams are recycled. No lock is needed during
  3193. // allocation since each thread allocates its own deque.
  3194. static void __kmp_alloc_task_deque(kmp_info_t *thread,
  3195. kmp_thread_data_t *thread_data) {
  3196. __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
  3197. KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
  3198. // Initialize last stolen task field to "none"
  3199. thread_data->td.td_deque_last_stolen = -1;
  3200. KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
  3201. KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
  3202. KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
  3203. KE_TRACE(
  3204. 10,
  3205. ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
  3206. __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
  3207. // Allocate space for task deque, and zero the deque
  3208. // Cannot use __kmp_thread_calloc() because threads not around for
  3209. // kmp_reap_task_team( ).
  3210. thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
  3211. INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
  3212. thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
  3213. }
  3214. // __kmp_free_task_deque:
  3215. // Deallocates a task deque for a particular thread. Happens at library
  3216. // deallocation so don't need to reset all thread data fields.
  3217. static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
  3218. if (thread_data->td.td_deque != NULL) {
  3219. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  3220. TCW_4(thread_data->td.td_deque_ntasks, 0);
  3221. __kmp_free(thread_data->td.td_deque);
  3222. thread_data->td.td_deque = NULL;
  3223. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  3224. }
  3225. #ifdef BUILD_TIED_TASK_STACK
  3226. // GEH: Figure out what to do here for td_susp_tied_tasks
  3227. if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
  3228. __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
  3229. }
  3230. #endif // BUILD_TIED_TASK_STACK
  3231. }
  3232. // __kmp_realloc_task_threads_data:
  3233. // Allocates a threads_data array for a task team, either by allocating an
  3234. // initial array or enlarging an existing array. Only the first thread to get
  3235. // the lock allocs or enlarges the array and re-initializes the array elements.
  3236. // That thread returns "TRUE", the rest return "FALSE".
  3237. // Assumes that the new array size is given by task_team -> tt.tt_nproc.
  3238. // The current size is given by task_team -> tt.tt_max_threads.
  3239. static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
  3240. kmp_task_team_t *task_team) {
  3241. kmp_thread_data_t **threads_data_p;
  3242. kmp_int32 nthreads, maxthreads;
  3243. int is_init_thread = FALSE;
  3244. if (TCR_4(task_team->tt.tt_found_tasks)) {
  3245. // Already reallocated and initialized.
  3246. return FALSE;
  3247. }
  3248. threads_data_p = &task_team->tt.tt_threads_data;
  3249. nthreads = task_team->tt.tt_nproc;
  3250. maxthreads = task_team->tt.tt_max_threads;
  3251. // All threads must lock when they encounter the first task of the implicit
  3252. // task region to make sure threads_data fields are (re)initialized before
  3253. // used.
  3254. __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
  3255. if (!TCR_4(task_team->tt.tt_found_tasks)) {
  3256. // first thread to enable tasking
  3257. kmp_team_t *team = thread->th.th_team;
  3258. int i;
  3259. is_init_thread = TRUE;
  3260. if (maxthreads < nthreads) {
  3261. if (*threads_data_p != NULL) {
  3262. kmp_thread_data_t *old_data = *threads_data_p;
  3263. kmp_thread_data_t *new_data = NULL;
  3264. KE_TRACE(
  3265. 10,
  3266. ("__kmp_realloc_task_threads_data: T#%d reallocating "
  3267. "threads data for task_team %p, new_size = %d, old_size = %d\n",
  3268. __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
  3269. // Reallocate threads_data to have more elements than current array
  3270. // Cannot use __kmp_thread_realloc() because threads not around for
  3271. // kmp_reap_task_team( ). Note all new array entries are initialized
  3272. // to zero by __kmp_allocate().
  3273. new_data = (kmp_thread_data_t *)__kmp_allocate(
  3274. nthreads * sizeof(kmp_thread_data_t));
  3275. // copy old data to new data
  3276. KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
  3277. (void *)old_data, maxthreads * sizeof(kmp_thread_data_t));
  3278. #ifdef BUILD_TIED_TASK_STACK
  3279. // GEH: Figure out if this is the right thing to do
  3280. for (i = maxthreads; i < nthreads; i++) {
  3281. kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
  3282. __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
  3283. }
  3284. #endif // BUILD_TIED_TASK_STACK
  3285. // Install the new data and free the old data
  3286. (*threads_data_p) = new_data;
  3287. __kmp_free(old_data);
  3288. } else {
  3289. KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
  3290. "threads data for task_team %p, size = %d\n",
  3291. __kmp_gtid_from_thread(thread), task_team, nthreads));
  3292. // Make the initial allocate for threads_data array, and zero entries
  3293. // Cannot use __kmp_thread_calloc() because threads not around for
  3294. // kmp_reap_task_team( ).
  3295. *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
  3296. nthreads * sizeof(kmp_thread_data_t));
  3297. #ifdef BUILD_TIED_TASK_STACK
  3298. // GEH: Figure out if this is the right thing to do
  3299. for (i = 0; i < nthreads; i++) {
  3300. kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
  3301. __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
  3302. }
  3303. #endif // BUILD_TIED_TASK_STACK
  3304. }
  3305. task_team->tt.tt_max_threads = nthreads;
  3306. } else {
  3307. // If array has (more than) enough elements, go ahead and use it
  3308. KMP_DEBUG_ASSERT(*threads_data_p != NULL);
  3309. }
  3310. // initialize threads_data pointers back to thread_info structures
  3311. for (i = 0; i < nthreads; i++) {
  3312. kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
  3313. thread_data->td.td_thr = team->t.t_threads[i];
  3314. if (thread_data->td.td_deque_last_stolen >= nthreads) {
  3315. // The last stolen field survives across teams / barrier, and the number
  3316. // of threads may have changed. It's possible (likely?) that a new
  3317. // parallel region will exhibit the same behavior as previous region.
  3318. thread_data->td.td_deque_last_stolen = -1;
  3319. }
  3320. }
  3321. KMP_MB();
  3322. TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
  3323. }
  3324. __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
  3325. return is_init_thread;
  3326. }
  3327. // __kmp_free_task_threads_data:
  3328. // Deallocates a threads_data array for a task team, including any attached
  3329. // tasking deques. Only occurs at library shutdown.
  3330. static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
  3331. __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
  3332. if (task_team->tt.tt_threads_data != NULL) {
  3333. int i;
  3334. for (i = 0; i < task_team->tt.tt_max_threads; i++) {
  3335. __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
  3336. }
  3337. __kmp_free(task_team->tt.tt_threads_data);
  3338. task_team->tt.tt_threads_data = NULL;
  3339. }
  3340. __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
  3341. }
  3342. // __kmp_free_task_pri_list:
  3343. // Deallocates tasking deques used for priority tasks.
  3344. // Only occurs at library shutdown.
  3345. static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
  3346. __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  3347. if (task_team->tt.tt_task_pri_list != NULL) {
  3348. kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
  3349. while (list != NULL) {
  3350. kmp_task_pri_t *next = list->next;
  3351. __kmp_free_task_deque(&list->td);
  3352. __kmp_free(list);
  3353. list = next;
  3354. }
  3355. task_team->tt.tt_task_pri_list = NULL;
  3356. }
  3357. __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  3358. }
  3359. // __kmp_allocate_task_team:
  3360. // Allocates a task team associated with a specific team, taking it from
  3361. // the global task team free list if possible. Also initializes data
  3362. // structures.
  3363. static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
  3364. kmp_team_t *team) {
  3365. kmp_task_team_t *task_team = NULL;
  3366. int nthreads;
  3367. KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
  3368. (thread ? __kmp_gtid_from_thread(thread) : -1), team));
  3369. if (TCR_PTR(__kmp_free_task_teams) != NULL) {
  3370. // Take a task team from the task team pool
  3371. __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
  3372. if (__kmp_free_task_teams != NULL) {
  3373. task_team = __kmp_free_task_teams;
  3374. TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
  3375. task_team->tt.tt_next = NULL;
  3376. }
  3377. __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
  3378. }
  3379. if (task_team == NULL) {
  3380. KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
  3381. "task team for team %p\n",
  3382. __kmp_gtid_from_thread(thread), team));
  3383. // Allocate a new task team if one is not available. Cannot use
  3384. // __kmp_thread_malloc because threads not around for kmp_reap_task_team.
  3385. task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
  3386. __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
  3387. __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
  3388. #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
  3389. // suppress race conditions detection on synchronization flags in debug mode
  3390. // this helps to analyze library internals eliminating false positives
  3391. __itt_suppress_mark_range(
  3392. __itt_suppress_range, __itt_suppress_threading_errors,
  3393. &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
  3394. __itt_suppress_mark_range(__itt_suppress_range,
  3395. __itt_suppress_threading_errors,
  3396. CCAST(kmp_uint32 *, &task_team->tt.tt_active),
  3397. sizeof(task_team->tt.tt_active));
  3398. #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
  3399. // Note: __kmp_allocate zeroes returned memory, othewise we would need:
  3400. // task_team->tt.tt_threads_data = NULL;
  3401. // task_team->tt.tt_max_threads = 0;
  3402. // task_team->tt.tt_next = NULL;
  3403. }
  3404. TCW_4(task_team->tt.tt_found_tasks, FALSE);
  3405. TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
  3406. TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
  3407. task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
  3408. KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
  3409. TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
  3410. TCW_4(task_team->tt.tt_active, TRUE);
  3411. KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
  3412. "unfinished_threads init'd to %d\n",
  3413. (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
  3414. KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
  3415. return task_team;
  3416. }
  3417. // __kmp_free_task_team:
  3418. // Frees the task team associated with a specific thread, and adds it
  3419. // to the global task team free list.
  3420. void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
  3421. KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
  3422. thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
  3423. // Put task team back on free list
  3424. __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
  3425. KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
  3426. task_team->tt.tt_next = __kmp_free_task_teams;
  3427. TCW_PTR(__kmp_free_task_teams, task_team);
  3428. __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
  3429. }
  3430. // __kmp_reap_task_teams:
  3431. // Free all the task teams on the task team free list.
  3432. // Should only be done during library shutdown.
  3433. // Cannot do anything that needs a thread structure or gtid since they are
  3434. // already gone.
  3435. void __kmp_reap_task_teams(void) {
  3436. kmp_task_team_t *task_team;
  3437. if (TCR_PTR(__kmp_free_task_teams) != NULL) {
  3438. // Free all task_teams on the free list
  3439. __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
  3440. while ((task_team = __kmp_free_task_teams) != NULL) {
  3441. __kmp_free_task_teams = task_team->tt.tt_next;
  3442. task_team->tt.tt_next = NULL;
  3443. // Free threads_data if necessary
  3444. if (task_team->tt.tt_threads_data != NULL) {
  3445. __kmp_free_task_threads_data(task_team);
  3446. }
  3447. if (task_team->tt.tt_task_pri_list != NULL) {
  3448. __kmp_free_task_pri_list(task_team);
  3449. }
  3450. __kmp_free(task_team);
  3451. }
  3452. __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
  3453. }
  3454. }
  3455. // __kmp_wait_to_unref_task_teams:
  3456. // Some threads could still be in the fork barrier release code, possibly
  3457. // trying to steal tasks. Wait for each thread to unreference its task team.
  3458. void __kmp_wait_to_unref_task_teams(void) {
  3459. kmp_info_t *thread;
  3460. kmp_uint32 spins;
  3461. kmp_uint64 time;
  3462. int done;
  3463. KMP_INIT_YIELD(spins);
  3464. KMP_INIT_BACKOFF(time);
  3465. for (;;) {
  3466. done = TRUE;
  3467. // TODO: GEH - this may be is wrong because some sync would be necessary
  3468. // in case threads are added to the pool during the traversal. Need to
  3469. // verify that lock for thread pool is held when calling this routine.
  3470. for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
  3471. thread = thread->th.th_next_pool) {
  3472. #if KMP_OS_WINDOWS
  3473. DWORD exit_val;
  3474. #endif
  3475. if (TCR_PTR(thread->th.th_task_team) == NULL) {
  3476. KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
  3477. __kmp_gtid_from_thread(thread)));
  3478. continue;
  3479. }
  3480. #if KMP_OS_WINDOWS
  3481. // TODO: GEH - add this check for Linux* OS / OS X* as well?
  3482. if (!__kmp_is_thread_alive(thread, &exit_val)) {
  3483. thread->th.th_task_team = NULL;
  3484. continue;
  3485. }
  3486. #endif
  3487. done = FALSE; // Because th_task_team pointer is not NULL for this thread
  3488. KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
  3489. "unreference task_team\n",
  3490. __kmp_gtid_from_thread(thread)));
  3491. if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
  3492. void *sleep_loc;
  3493. // If the thread is sleeping, awaken it.
  3494. if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
  3495. NULL) {
  3496. KA_TRACE(
  3497. 10,
  3498. ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
  3499. __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
  3500. __kmp_null_resume_wrapper(thread);
  3501. }
  3502. }
  3503. }
  3504. if (done) {
  3505. break;
  3506. }
  3507. // If oversubscribed or have waited a bit, yield.
  3508. KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
  3509. }
  3510. }
  3511. // __kmp_task_team_setup: Create a task_team for the current team, but use
  3512. // an already created, unused one if it already exists.
  3513. void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
  3514. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  3515. // If this task_team hasn't been created yet, allocate it. It will be used in
  3516. // the region after the next.
  3517. // If it exists, it is the current task team and shouldn't be touched yet as
  3518. // it may still be in use.
  3519. if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
  3520. (always || team->t.t_nproc > 1)) {
  3521. team->t.t_task_team[this_thr->th.th_task_state] =
  3522. __kmp_allocate_task_team(this_thr, team);
  3523. KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created new task_team %p"
  3524. " for team %d at parity=%d\n",
  3525. __kmp_gtid_from_thread(this_thr),
  3526. team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
  3527. this_thr->th.th_task_state));
  3528. }
  3529. // After threads exit the release, they will call sync, and then point to this
  3530. // other task_team; make sure it is allocated and properly initialized. As
  3531. // threads spin in the barrier release phase, they will continue to use the
  3532. // previous task_team struct(above), until they receive the signal to stop
  3533. // checking for tasks (they can't safely reference the kmp_team_t struct,
  3534. // which could be reallocated by the primary thread). No task teams are formed
  3535. // for serialized teams.
  3536. if (team->t.t_nproc > 1) {
  3537. int other_team = 1 - this_thr->th.th_task_state;
  3538. KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
  3539. if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
  3540. team->t.t_task_team[other_team] =
  3541. __kmp_allocate_task_team(this_thr, team);
  3542. KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d created second new "
  3543. "task_team %p for team %d at parity=%d\n",
  3544. __kmp_gtid_from_thread(this_thr),
  3545. team->t.t_task_team[other_team], team->t.t_id, other_team));
  3546. } else { // Leave the old task team struct in place for the upcoming region;
  3547. // adjust as needed
  3548. kmp_task_team_t *task_team = team->t.t_task_team[other_team];
  3549. if (!task_team->tt.tt_active ||
  3550. team->t.t_nproc != task_team->tt.tt_nproc) {
  3551. TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
  3552. TCW_4(task_team->tt.tt_found_tasks, FALSE);
  3553. TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
  3554. TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
  3555. KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
  3556. team->t.t_nproc);
  3557. TCW_4(task_team->tt.tt_active, TRUE);
  3558. }
  3559. // if team size has changed, the first thread to enable tasking will
  3560. // realloc threads_data if necessary
  3561. KA_TRACE(20, ("__kmp_task_team_setup: Primary T#%d reset next task_team "
  3562. "%p for team %d at parity=%d\n",
  3563. __kmp_gtid_from_thread(this_thr),
  3564. team->t.t_task_team[other_team], team->t.t_id, other_team));
  3565. }
  3566. }
  3567. // For regular thread, task enabling should be called when the task is going
  3568. // to be pushed to a dequeue. However, for the hidden helper thread, we need
  3569. // it ahead of time so that some operations can be performed without race
  3570. // condition.
  3571. if (this_thr == __kmp_hidden_helper_main_thread) {
  3572. for (int i = 0; i < 2; ++i) {
  3573. kmp_task_team_t *task_team = team->t.t_task_team[i];
  3574. if (KMP_TASKING_ENABLED(task_team)) {
  3575. continue;
  3576. }
  3577. __kmp_enable_tasking(task_team, this_thr);
  3578. for (int j = 0; j < task_team->tt.tt_nproc; ++j) {
  3579. kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
  3580. if (thread_data->td.td_deque == NULL) {
  3581. __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
  3582. }
  3583. }
  3584. }
  3585. }
  3586. }
  3587. // __kmp_task_team_sync: Propagation of task team data from team to threads
  3588. // which happens just after the release phase of a team barrier. This may be
  3589. // called by any thread, but only for teams with # threads > 1.
  3590. void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
  3591. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  3592. // Toggle the th_task_state field, to switch which task_team this thread
  3593. // refers to
  3594. this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
  3595. // It is now safe to propagate the task team pointer from the team struct to
  3596. // the current thread.
  3597. TCW_PTR(this_thr->th.th_task_team,
  3598. team->t.t_task_team[this_thr->th.th_task_state]);
  3599. KA_TRACE(20,
  3600. ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
  3601. "%p from Team #%d (parity=%d)\n",
  3602. __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
  3603. team->t.t_id, this_thr->th.th_task_state));
  3604. }
  3605. // __kmp_task_team_wait: Primary thread waits for outstanding tasks after the
  3606. // barrier gather phase. Only called by primary thread if #threads in team > 1
  3607. // or if proxy tasks were created.
  3608. //
  3609. // wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
  3610. // by passing in 0 optionally as the last argument. When wait is zero, primary
  3611. // thread does not wait for unfinished_threads to reach 0.
  3612. void __kmp_task_team_wait(
  3613. kmp_info_t *this_thr,
  3614. kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
  3615. kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
  3616. KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
  3617. KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
  3618. if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
  3619. if (wait) {
  3620. KA_TRACE(20, ("__kmp_task_team_wait: Primary T#%d waiting for all tasks "
  3621. "(for unfinished_threads to reach 0) on task_team = %p\n",
  3622. __kmp_gtid_from_thread(this_thr), task_team));
  3623. // Worker threads may have dropped through to release phase, but could
  3624. // still be executing tasks. Wait here for tasks to complete. To avoid
  3625. // memory contention, only primary thread checks termination condition.
  3626. kmp_flag_32<false, false> flag(
  3627. RCAST(std::atomic<kmp_uint32> *,
  3628. &task_team->tt.tt_unfinished_threads),
  3629. 0U);
  3630. flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
  3631. }
  3632. // Deactivate the old task team, so that the worker threads will stop
  3633. // referencing it while spinning.
  3634. KA_TRACE(
  3635. 20,
  3636. ("__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
  3637. "setting active to false, setting local and team's pointer to NULL\n",
  3638. __kmp_gtid_from_thread(this_thr), task_team));
  3639. KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
  3640. task_team->tt.tt_found_proxy_tasks == TRUE ||
  3641. task_team->tt.tt_hidden_helper_task_encountered == TRUE);
  3642. TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
  3643. TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
  3644. KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
  3645. TCW_SYNC_4(task_team->tt.tt_active, FALSE);
  3646. KMP_MB();
  3647. TCW_PTR(this_thr->th.th_task_team, NULL);
  3648. }
  3649. }
  3650. // __kmp_tasking_barrier:
  3651. // This routine is called only when __kmp_tasking_mode == tskm_extra_barrier.
  3652. // Internal function to execute all tasks prior to a regular barrier or a join
  3653. // barrier. It is a full barrier itself, which unfortunately turns regular
  3654. // barriers into double barriers and join barriers into 1 1/2 barriers.
  3655. void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
  3656. std::atomic<kmp_uint32> *spin = RCAST(
  3657. std::atomic<kmp_uint32> *,
  3658. &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
  3659. int flag = FALSE;
  3660. KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
  3661. #if USE_ITT_BUILD
  3662. KMP_FSYNC_SPIN_INIT(spin, NULL);
  3663. #endif /* USE_ITT_BUILD */
  3664. kmp_flag_32<false, false> spin_flag(spin, 0U);
  3665. while (!spin_flag.execute_tasks(thread, gtid, TRUE,
  3666. &flag USE_ITT_BUILD_ARG(NULL), 0)) {
  3667. #if USE_ITT_BUILD
  3668. // TODO: What about itt_sync_obj??
  3669. KMP_FSYNC_SPIN_PREPARE(RCAST(void *, spin));
  3670. #endif /* USE_ITT_BUILD */
  3671. if (TCR_4(__kmp_global.g.g_done)) {
  3672. if (__kmp_global.g.g_abort)
  3673. __kmp_abort_thread();
  3674. break;
  3675. }
  3676. KMP_YIELD(TRUE);
  3677. }
  3678. #if USE_ITT_BUILD
  3679. KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin));
  3680. #endif /* USE_ITT_BUILD */
  3681. }
  3682. // __kmp_give_task puts a task into a given thread queue if:
  3683. // - the queue for that thread was created
  3684. // - there's space in that queue
  3685. // Because of this, __kmp_push_task needs to check if there's space after
  3686. // getting the lock
  3687. static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
  3688. kmp_int32 pass) {
  3689. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  3690. kmp_task_team_t *task_team = taskdata->td_task_team;
  3691. KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
  3692. taskdata, tid));
  3693. // If task_team is NULL something went really bad...
  3694. KMP_DEBUG_ASSERT(task_team != NULL);
  3695. bool result = false;
  3696. kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
  3697. if (thread_data->td.td_deque == NULL) {
  3698. // There's no queue in this thread, go find another one
  3699. // We're guaranteed that at least one thread has a queue
  3700. KA_TRACE(30,
  3701. ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
  3702. tid, taskdata));
  3703. return result;
  3704. }
  3705. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  3706. TASK_DEQUE_SIZE(thread_data->td)) {
  3707. KA_TRACE(
  3708. 30,
  3709. ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
  3710. taskdata, tid));
  3711. // if this deque is bigger than the pass ratio give a chance to another
  3712. // thread
  3713. if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
  3714. return result;
  3715. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  3716. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  3717. TASK_DEQUE_SIZE(thread_data->td)) {
  3718. // expand deque to push the task which is not allowed to execute
  3719. __kmp_realloc_task_deque(thread, thread_data);
  3720. }
  3721. } else {
  3722. __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
  3723. if (TCR_4(thread_data->td.td_deque_ntasks) >=
  3724. TASK_DEQUE_SIZE(thread_data->td)) {
  3725. KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
  3726. "thread %d.\n",
  3727. taskdata, tid));
  3728. // if this deque is bigger than the pass ratio give a chance to another
  3729. // thread
  3730. if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
  3731. goto release_and_exit;
  3732. __kmp_realloc_task_deque(thread, thread_data);
  3733. }
  3734. }
  3735. // lock is held here, and there is space in the deque
  3736. thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
  3737. // Wrap index.
  3738. thread_data->td.td_deque_tail =
  3739. (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
  3740. TCW_4(thread_data->td.td_deque_ntasks,
  3741. TCR_4(thread_data->td.td_deque_ntasks) + 1);
  3742. result = true;
  3743. KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
  3744. taskdata, tid));
  3745. release_and_exit:
  3746. __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
  3747. return result;
  3748. }
  3749. #define PROXY_TASK_FLAG 0x40000000
  3750. /* The finish of the proxy tasks is divided in two pieces:
  3751. - the top half is the one that can be done from a thread outside the team
  3752. - the bottom half must be run from a thread within the team
  3753. In order to run the bottom half the task gets queued back into one of the
  3754. threads of the team. Once the td_incomplete_child_task counter of the parent
  3755. is decremented the threads can leave the barriers. So, the bottom half needs
  3756. to be queued before the counter is decremented. The top half is therefore
  3757. divided in two parts:
  3758. - things that can be run before queuing the bottom half
  3759. - things that must be run after queuing the bottom half
  3760. This creates a second race as the bottom half can free the task before the
  3761. second top half is executed. To avoid this we use the
  3762. td_incomplete_child_task of the proxy task to synchronize the top and bottom
  3763. half. */
  3764. static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
  3765. KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
  3766. KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
  3767. KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
  3768. KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
  3769. taskdata->td_flags.complete = 1; // mark the task as completed
  3770. if (taskdata->td_taskgroup)
  3771. KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
  3772. // Create an imaginary children for this task so the bottom half cannot
  3773. // release the task before we have completed the second top half
  3774. KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
  3775. }
  3776. static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
  3777. #if KMP_DEBUG
  3778. kmp_int32 children = 0;
  3779. // Predecrement simulated by "- 1" calculation
  3780. children = -1 +
  3781. #endif
  3782. KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
  3783. KMP_DEBUG_ASSERT(children >= 0);
  3784. // Remove the imaginary children
  3785. KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
  3786. }
  3787. static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
  3788. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
  3789. kmp_info_t *thread = __kmp_threads[gtid];
  3790. KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
  3791. KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
  3792. 1); // top half must run before bottom half
  3793. // We need to wait to make sure the top half is finished
  3794. // Spinning here should be ok as this should happen quickly
  3795. while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
  3796. PROXY_TASK_FLAG) > 0)
  3797. ;
  3798. __kmp_release_deps(gtid, taskdata);
  3799. __kmp_free_task_and_ancestors(gtid, taskdata, thread);
  3800. }
  3801. /*!
  3802. @ingroup TASKING
  3803. @param gtid Global Thread ID of encountering thread
  3804. @param ptask Task which execution is completed
  3805. Execute the completion of a proxy task from a thread of that is part of the
  3806. team. Run first and bottom halves directly.
  3807. */
  3808. void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
  3809. KMP_DEBUG_ASSERT(ptask != NULL);
  3810. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
  3811. KA_TRACE(
  3812. 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
  3813. gtid, taskdata));
  3814. __kmp_assert_valid_gtid(gtid);
  3815. KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
  3816. __kmp_first_top_half_finish_proxy(taskdata);
  3817. __kmp_second_top_half_finish_proxy(taskdata);
  3818. __kmp_bottom_half_finish_proxy(gtid, ptask);
  3819. KA_TRACE(10,
  3820. ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
  3821. gtid, taskdata));
  3822. }
  3823. void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
  3824. KMP_DEBUG_ASSERT(ptask != NULL);
  3825. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
  3826. // Enqueue task to complete bottom half completion from a thread within the
  3827. // corresponding team
  3828. kmp_team_t *team = taskdata->td_team;
  3829. kmp_int32 nthreads = team->t.t_nproc;
  3830. kmp_info_t *thread;
  3831. // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
  3832. // but we cannot use __kmp_get_random here
  3833. kmp_int32 start_k = start % nthreads;
  3834. kmp_int32 pass = 1;
  3835. kmp_int32 k = start_k;
  3836. do {
  3837. // For now we're just linearly trying to find a thread
  3838. thread = team->t.t_threads[k];
  3839. k = (k + 1) % nthreads;
  3840. // we did a full pass through all the threads
  3841. if (k == start_k)
  3842. pass = pass << 1;
  3843. } while (!__kmp_give_task(thread, k, ptask, pass));
  3844. if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
  3845. // awake at least one thread to execute given task
  3846. for (int i = 0; i < nthreads; ++i) {
  3847. thread = team->t.t_threads[i];
  3848. if (thread->th.th_sleep_loc != NULL) {
  3849. __kmp_null_resume_wrapper(thread);
  3850. break;
  3851. }
  3852. }
  3853. }
  3854. }
  3855. /*!
  3856. @ingroup TASKING
  3857. @param ptask Task which execution is completed
  3858. Execute the completion of a proxy task from a thread that could not belong to
  3859. the team.
  3860. */
  3861. void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
  3862. KMP_DEBUG_ASSERT(ptask != NULL);
  3863. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
  3864. KA_TRACE(
  3865. 10,
  3866. ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
  3867. taskdata));
  3868. KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
  3869. __kmp_first_top_half_finish_proxy(taskdata);
  3870. __kmpc_give_task(ptask);
  3871. __kmp_second_top_half_finish_proxy(taskdata);
  3872. KA_TRACE(
  3873. 10,
  3874. ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
  3875. taskdata));
  3876. }
  3877. kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, int gtid,
  3878. kmp_task_t *task) {
  3879. kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
  3880. if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
  3881. td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
  3882. td->td_allow_completion_event.ed.task = task;
  3883. __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
  3884. }
  3885. return &td->td_allow_completion_event;
  3886. }
  3887. void __kmp_fulfill_event(kmp_event_t *event) {
  3888. if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
  3889. kmp_task_t *ptask = event->ed.task;
  3890. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
  3891. bool detached = false;
  3892. int gtid = __kmp_get_gtid();
  3893. // The associated task might have completed or could be completing at this
  3894. // point.
  3895. // We need to take the lock to avoid races
  3896. __kmp_acquire_tas_lock(&event->lock, gtid);
  3897. if (taskdata->td_flags.proxy == TASK_PROXY) {
  3898. detached = true;
  3899. } else {
  3900. #if OMPT_SUPPORT
  3901. // The OMPT event must occur under mutual exclusion,
  3902. // otherwise the tool might access ptask after free
  3903. if (UNLIKELY(ompt_enabled.enabled))
  3904. __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
  3905. #endif
  3906. }
  3907. event->type = KMP_EVENT_UNINITIALIZED;
  3908. __kmp_release_tas_lock(&event->lock, gtid);
  3909. if (detached) {
  3910. #if OMPT_SUPPORT
  3911. // We free ptask afterwards and know the task is finished,
  3912. // so locking is not necessary
  3913. if (UNLIKELY(ompt_enabled.enabled))
  3914. __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
  3915. #endif
  3916. // If the task detached complete the proxy task
  3917. if (gtid >= 0) {
  3918. kmp_team_t *team = taskdata->td_team;
  3919. kmp_info_t *thread = __kmp_get_thread();
  3920. if (thread->th.th_team == team) {
  3921. __kmpc_proxy_task_completed(gtid, ptask);
  3922. return;
  3923. }
  3924. }
  3925. // fallback
  3926. __kmpc_proxy_task_completed_ooo(ptask);
  3927. }
  3928. }
  3929. }
  3930. // __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
  3931. // for taskloop
  3932. //
  3933. // thread: allocating thread
  3934. // task_src: pointer to source task to be duplicated
  3935. // returns: a pointer to the allocated kmp_task_t structure (task).
  3936. kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
  3937. kmp_task_t *task;
  3938. kmp_taskdata_t *taskdata;
  3939. kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
  3940. kmp_taskdata_t *parent_task = taskdata_src->td_parent; // same parent task
  3941. size_t shareds_offset;
  3942. size_t task_size;
  3943. KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
  3944. task_src));
  3945. KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
  3946. TASK_FULL); // it should not be proxy task
  3947. KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
  3948. task_size = taskdata_src->td_size_alloc;
  3949. // Allocate a kmp_taskdata_t block and a kmp_task_t block.
  3950. KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
  3951. task_size));
  3952. #if USE_FAST_MEMORY
  3953. taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
  3954. #else
  3955. taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
  3956. #endif /* USE_FAST_MEMORY */
  3957. KMP_MEMCPY(taskdata, taskdata_src, task_size);
  3958. task = KMP_TASKDATA_TO_TASK(taskdata);
  3959. // Initialize new task (only specific fields not affected by memcpy)
  3960. taskdata->td_task_id = KMP_GEN_TASK_ID();
  3961. if (task->shareds != NULL) { // need setup shareds pointer
  3962. shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
  3963. task->shareds = &((char *)taskdata)[shareds_offset];
  3964. KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
  3965. 0);
  3966. }
  3967. taskdata->td_alloc_thread = thread;
  3968. taskdata->td_parent = parent_task;
  3969. // task inherits the taskgroup from the parent task
  3970. taskdata->td_taskgroup = parent_task->td_taskgroup;
  3971. // tied task needs to initialize the td_last_tied at creation,
  3972. // untied one does this when it is scheduled for execution
  3973. if (taskdata->td_flags.tiedness == TASK_TIED)
  3974. taskdata->td_last_tied = taskdata;
  3975. // Only need to keep track of child task counts if team parallel and tasking
  3976. // not serialized
  3977. if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
  3978. KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
  3979. if (parent_task->td_taskgroup)
  3980. KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
  3981. // Only need to keep track of allocated child tasks for explicit tasks since
  3982. // implicit not deallocated
  3983. if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
  3984. KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
  3985. }
  3986. KA_TRACE(20,
  3987. ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
  3988. thread, taskdata, taskdata->td_parent));
  3989. #if OMPT_SUPPORT
  3990. if (UNLIKELY(ompt_enabled.enabled))
  3991. __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
  3992. #endif
  3993. return task;
  3994. }
  3995. // Routine optionally generated by the compiler for setting the lastprivate flag
  3996. // and calling needed constructors for private/firstprivate objects
  3997. // (used to form taskloop tasks from pattern task)
  3998. // Parameters: dest task, src task, lastprivate flag.
  3999. typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
  4000. KMP_BUILD_ASSERT(sizeof(long) == 4 || sizeof(long) == 8);
  4001. // class to encapsulate manipulating loop bounds in a taskloop task.
  4002. // this abstracts away the Intel vs GOMP taskloop interface for setting/getting
  4003. // the loop bound variables.
  4004. class kmp_taskloop_bounds_t {
  4005. kmp_task_t *task;
  4006. const kmp_taskdata_t *taskdata;
  4007. size_t lower_offset;
  4008. size_t upper_offset;
  4009. public:
  4010. kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
  4011. : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
  4012. lower_offset((char *)lb - (char *)task),
  4013. upper_offset((char *)ub - (char *)task) {
  4014. KMP_DEBUG_ASSERT((char *)lb > (char *)_task);
  4015. KMP_DEBUG_ASSERT((char *)ub > (char *)_task);
  4016. }
  4017. kmp_taskloop_bounds_t(kmp_task_t *_task, const kmp_taskloop_bounds_t &bounds)
  4018. : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
  4019. lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
  4020. size_t get_lower_offset() const { return lower_offset; }
  4021. size_t get_upper_offset() const { return upper_offset; }
  4022. kmp_uint64 get_lb() const {
  4023. kmp_int64 retval;
  4024. #if defined(KMP_GOMP_COMPAT)
  4025. // Intel task just returns the lower bound normally
  4026. if (!taskdata->td_flags.native) {
  4027. retval = *(kmp_int64 *)((char *)task + lower_offset);
  4028. } else {
  4029. // GOMP task has to take into account the sizeof(long)
  4030. if (taskdata->td_size_loop_bounds == 4) {
  4031. kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
  4032. retval = (kmp_int64)*lb;
  4033. } else {
  4034. kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
  4035. retval = (kmp_int64)*lb;
  4036. }
  4037. }
  4038. #else
  4039. (void)taskdata;
  4040. retval = *(kmp_int64 *)((char *)task + lower_offset);
  4041. #endif // defined(KMP_GOMP_COMPAT)
  4042. return retval;
  4043. }
  4044. kmp_uint64 get_ub() const {
  4045. kmp_int64 retval;
  4046. #if defined(KMP_GOMP_COMPAT)
  4047. // Intel task just returns the upper bound normally
  4048. if (!taskdata->td_flags.native) {
  4049. retval = *(kmp_int64 *)((char *)task + upper_offset);
  4050. } else {
  4051. // GOMP task has to take into account the sizeof(long)
  4052. if (taskdata->td_size_loop_bounds == 4) {
  4053. kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
  4054. retval = (kmp_int64)*ub;
  4055. } else {
  4056. kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
  4057. retval = (kmp_int64)*ub;
  4058. }
  4059. }
  4060. #else
  4061. retval = *(kmp_int64 *)((char *)task + upper_offset);
  4062. #endif // defined(KMP_GOMP_COMPAT)
  4063. return retval;
  4064. }
  4065. void set_lb(kmp_uint64 lb) {
  4066. #if defined(KMP_GOMP_COMPAT)
  4067. // Intel task just sets the lower bound normally
  4068. if (!taskdata->td_flags.native) {
  4069. *(kmp_uint64 *)((char *)task + lower_offset) = lb;
  4070. } else {
  4071. // GOMP task has to take into account the sizeof(long)
  4072. if (taskdata->td_size_loop_bounds == 4) {
  4073. kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
  4074. *lower = (kmp_uint32)lb;
  4075. } else {
  4076. kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
  4077. *lower = (kmp_uint64)lb;
  4078. }
  4079. }
  4080. #else
  4081. *(kmp_uint64 *)((char *)task + lower_offset) = lb;
  4082. #endif // defined(KMP_GOMP_COMPAT)
  4083. }
  4084. void set_ub(kmp_uint64 ub) {
  4085. #if defined(KMP_GOMP_COMPAT)
  4086. // Intel task just sets the upper bound normally
  4087. if (!taskdata->td_flags.native) {
  4088. *(kmp_uint64 *)((char *)task + upper_offset) = ub;
  4089. } else {
  4090. // GOMP task has to take into account the sizeof(long)
  4091. if (taskdata->td_size_loop_bounds == 4) {
  4092. kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
  4093. *upper = (kmp_uint32)ub;
  4094. } else {
  4095. kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
  4096. *upper = (kmp_uint64)ub;
  4097. }
  4098. }
  4099. #else
  4100. *(kmp_uint64 *)((char *)task + upper_offset) = ub;
  4101. #endif // defined(KMP_GOMP_COMPAT)
  4102. }
  4103. };
  4104. // __kmp_taskloop_linear: Start tasks of the taskloop linearly
  4105. //
  4106. // loc Source location information
  4107. // gtid Global thread ID
  4108. // task Pattern task, exposes the loop iteration range
  4109. // lb Pointer to loop lower bound in task structure
  4110. // ub Pointer to loop upper bound in task structure
  4111. // st Loop stride
  4112. // ub_glob Global upper bound (used for lastprivate check)
  4113. // num_tasks Number of tasks to execute
  4114. // grainsize Number of loop iterations per task
  4115. // extras Number of chunks with grainsize+1 iterations
  4116. // last_chunk Reduction of grainsize for last task
  4117. // tc Iterations count
  4118. // task_dup Tasks duplication routine
  4119. // codeptr_ra Return address for OMPT events
  4120. void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
  4121. kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
  4122. kmp_uint64 ub_glob, kmp_uint64 num_tasks,
  4123. kmp_uint64 grainsize, kmp_uint64 extras,
  4124. kmp_int64 last_chunk, kmp_uint64 tc,
  4125. #if OMPT_SUPPORT
  4126. void *codeptr_ra,
  4127. #endif
  4128. void *task_dup) {
  4129. KMP_COUNT_BLOCK(OMP_TASKLOOP);
  4130. KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
  4131. p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
  4132. // compiler provides global bounds here
  4133. kmp_taskloop_bounds_t task_bounds(task, lb, ub);
  4134. kmp_uint64 lower = task_bounds.get_lb();
  4135. kmp_uint64 upper = task_bounds.get_ub();
  4136. kmp_uint64 i;
  4137. kmp_info_t *thread = __kmp_threads[gtid];
  4138. kmp_taskdata_t *current_task = thread->th.th_current_task;
  4139. kmp_task_t *next_task;
  4140. kmp_int32 lastpriv = 0;
  4141. KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
  4142. (last_chunk < 0 ? last_chunk : extras));
  4143. KMP_DEBUG_ASSERT(num_tasks > extras);
  4144. KMP_DEBUG_ASSERT(num_tasks > 0);
  4145. KA_TRACE(20, ("__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
  4146. "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
  4147. gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
  4148. ub_glob, st, task_dup));
  4149. // Launch num_tasks tasks, assign grainsize iterations each task
  4150. for (i = 0; i < num_tasks; ++i) {
  4151. kmp_uint64 chunk_minus_1;
  4152. if (extras == 0) {
  4153. chunk_minus_1 = grainsize - 1;
  4154. } else {
  4155. chunk_minus_1 = grainsize;
  4156. --extras; // first extras iterations get bigger chunk (grainsize+1)
  4157. }
  4158. upper = lower + st * chunk_minus_1;
  4159. if (upper > *ub) {
  4160. upper = *ub;
  4161. }
  4162. if (i == num_tasks - 1) {
  4163. // schedule the last task, set lastprivate flag if needed
  4164. if (st == 1) { // most common case
  4165. KMP_DEBUG_ASSERT(upper == *ub);
  4166. if (upper == ub_glob)
  4167. lastpriv = 1;
  4168. } else if (st > 0) { // positive loop stride
  4169. KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
  4170. if ((kmp_uint64)st > ub_glob - upper)
  4171. lastpriv = 1;
  4172. } else { // negative loop stride
  4173. KMP_DEBUG_ASSERT(upper + st < *ub);
  4174. if (upper - ub_glob < (kmp_uint64)(-st))
  4175. lastpriv = 1;
  4176. }
  4177. }
  4178. next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
  4179. kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
  4180. kmp_taskloop_bounds_t next_task_bounds =
  4181. kmp_taskloop_bounds_t(next_task, task_bounds);
  4182. // adjust task-specific bounds
  4183. next_task_bounds.set_lb(lower);
  4184. if (next_taskdata->td_flags.native) {
  4185. next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
  4186. } else {
  4187. next_task_bounds.set_ub(upper);
  4188. }
  4189. if (ptask_dup != NULL) // set lastprivate flag, construct firstprivates,
  4190. // etc.
  4191. ptask_dup(next_task, task, lastpriv);
  4192. KA_TRACE(40,
  4193. ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
  4194. "upper %lld stride %lld, (offsets %p %p)\n",
  4195. gtid, i, next_task, lower, upper, st,
  4196. next_task_bounds.get_lower_offset(),
  4197. next_task_bounds.get_upper_offset()));
  4198. #if OMPT_SUPPORT
  4199. __kmp_omp_taskloop_task(NULL, gtid, next_task,
  4200. codeptr_ra); // schedule new task
  4201. #if OMPT_OPTIONAL
  4202. if (ompt_enabled.ompt_callback_dispatch) {
  4203. OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
  4204. lower, upper, st);
  4205. }
  4206. #endif // OMPT_OPTIONAL
  4207. #else
  4208. __kmp_omp_task(gtid, next_task, true); // schedule new task
  4209. #endif
  4210. lower = upper + st; // adjust lower bound for the next iteration
  4211. }
  4212. // free the pattern task and exit
  4213. __kmp_task_start(gtid, task, current_task); // make internal bookkeeping
  4214. // do not execute the pattern task, just do internal bookkeeping
  4215. __kmp_task_finish<false>(gtid, task, current_task);
  4216. }
  4217. // Structure to keep taskloop parameters for auxiliary task
  4218. // kept in the shareds of the task structure.
  4219. typedef struct __taskloop_params {
  4220. kmp_task_t *task;
  4221. kmp_uint64 *lb;
  4222. kmp_uint64 *ub;
  4223. void *task_dup;
  4224. kmp_int64 st;
  4225. kmp_uint64 ub_glob;
  4226. kmp_uint64 num_tasks;
  4227. kmp_uint64 grainsize;
  4228. kmp_uint64 extras;
  4229. kmp_int64 last_chunk;
  4230. kmp_uint64 tc;
  4231. kmp_uint64 num_t_min;
  4232. #if OMPT_SUPPORT
  4233. void *codeptr_ra;
  4234. #endif
  4235. } __taskloop_params_t;
  4236. void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *,
  4237. kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
  4238. kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
  4239. kmp_uint64,
  4240. #if OMPT_SUPPORT
  4241. void *,
  4242. #endif
  4243. void *);
  4244. // Execute part of the taskloop submitted as a task.
  4245. int __kmp_taskloop_task(int gtid, void *ptask) {
  4246. __taskloop_params_t *p =
  4247. (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
  4248. kmp_task_t *task = p->task;
  4249. kmp_uint64 *lb = p->lb;
  4250. kmp_uint64 *ub = p->ub;
  4251. void *task_dup = p->task_dup;
  4252. // p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
  4253. kmp_int64 st = p->st;
  4254. kmp_uint64 ub_glob = p->ub_glob;
  4255. kmp_uint64 num_tasks = p->num_tasks;
  4256. kmp_uint64 grainsize = p->grainsize;
  4257. kmp_uint64 extras = p->extras;
  4258. kmp_int64 last_chunk = p->last_chunk;
  4259. kmp_uint64 tc = p->tc;
  4260. kmp_uint64 num_t_min = p->num_t_min;
  4261. #if OMPT_SUPPORT
  4262. void *codeptr_ra = p->codeptr_ra;
  4263. #endif
  4264. #if KMP_DEBUG
  4265. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  4266. KMP_DEBUG_ASSERT(task != NULL);
  4267. KA_TRACE(20,
  4268. ("__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
  4269. " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
  4270. gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
  4271. st, task_dup));
  4272. #endif
  4273. KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
  4274. if (num_tasks > num_t_min)
  4275. __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
  4276. grainsize, extras, last_chunk, tc, num_t_min,
  4277. #if OMPT_SUPPORT
  4278. codeptr_ra,
  4279. #endif
  4280. task_dup);
  4281. else
  4282. __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
  4283. grainsize, extras, last_chunk, tc,
  4284. #if OMPT_SUPPORT
  4285. codeptr_ra,
  4286. #endif
  4287. task_dup);
  4288. KA_TRACE(40, ("__kmp_taskloop_task(exit): T#%d\n", gtid));
  4289. return 0;
  4290. }
  4291. // Schedule part of the taskloop as a task,
  4292. // execute the rest of the taskloop.
  4293. //
  4294. // loc Source location information
  4295. // gtid Global thread ID
  4296. // task Pattern task, exposes the loop iteration range
  4297. // lb Pointer to loop lower bound in task structure
  4298. // ub Pointer to loop upper bound in task structure
  4299. // st Loop stride
  4300. // ub_glob Global upper bound (used for lastprivate check)
  4301. // num_tasks Number of tasks to execute
  4302. // grainsize Number of loop iterations per task
  4303. // extras Number of chunks with grainsize+1 iterations
  4304. // last_chunk Reduction of grainsize for last task
  4305. // tc Iterations count
  4306. // num_t_min Threshold to launch tasks recursively
  4307. // task_dup Tasks duplication routine
  4308. // codeptr_ra Return address for OMPT events
  4309. void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task,
  4310. kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
  4311. kmp_uint64 ub_glob, kmp_uint64 num_tasks,
  4312. kmp_uint64 grainsize, kmp_uint64 extras,
  4313. kmp_int64 last_chunk, kmp_uint64 tc,
  4314. kmp_uint64 num_t_min,
  4315. #if OMPT_SUPPORT
  4316. void *codeptr_ra,
  4317. #endif
  4318. void *task_dup) {
  4319. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  4320. KMP_DEBUG_ASSERT(task != NULL);
  4321. KMP_DEBUG_ASSERT(num_tasks > num_t_min);
  4322. KA_TRACE(20,
  4323. ("__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
  4324. " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
  4325. gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
  4326. st, task_dup));
  4327. p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
  4328. kmp_uint64 lower = *lb;
  4329. kmp_info_t *thread = __kmp_threads[gtid];
  4330. // kmp_taskdata_t *current_task = thread->th.th_current_task;
  4331. kmp_task_t *next_task;
  4332. size_t lower_offset =
  4333. (char *)lb - (char *)task; // remember offset of lb in the task structure
  4334. size_t upper_offset =
  4335. (char *)ub - (char *)task; // remember offset of ub in the task structure
  4336. KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
  4337. (last_chunk < 0 ? last_chunk : extras));
  4338. KMP_DEBUG_ASSERT(num_tasks > extras);
  4339. KMP_DEBUG_ASSERT(num_tasks > 0);
  4340. // split the loop in two halves
  4341. kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
  4342. kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
  4343. kmp_uint64 gr_size0 = grainsize;
  4344. kmp_uint64 n_tsk0 = num_tasks >> 1; // num_tasks/2 to execute
  4345. kmp_uint64 n_tsk1 = num_tasks - n_tsk0; // to schedule as a task
  4346. if (last_chunk < 0) {
  4347. ext0 = ext1 = 0;
  4348. last_chunk1 = last_chunk;
  4349. tc0 = grainsize * n_tsk0;
  4350. tc1 = tc - tc0;
  4351. } else if (n_tsk0 <= extras) {
  4352. gr_size0++; // integrate extras into grainsize
  4353. ext0 = 0; // no extra iters in 1st half
  4354. ext1 = extras - n_tsk0; // remaining extras
  4355. tc0 = gr_size0 * n_tsk0;
  4356. tc1 = tc - tc0;
  4357. } else { // n_tsk0 > extras
  4358. ext1 = 0; // no extra iters in 2nd half
  4359. ext0 = extras;
  4360. tc1 = grainsize * n_tsk1;
  4361. tc0 = tc - tc1;
  4362. }
  4363. ub0 = lower + st * (tc0 - 1);
  4364. lb1 = ub0 + st;
  4365. // create pattern task for 2nd half of the loop
  4366. next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task
  4367. // adjust lower bound (upper bound is not changed) for the 2nd half
  4368. *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1;
  4369. if (ptask_dup != NULL) // construct firstprivates, etc.
  4370. ptask_dup(next_task, task, 0);
  4371. *ub = ub0; // adjust upper bound for the 1st half
  4372. // create auxiliary task for 2nd half of the loop
  4373. // make sure new task has same parent task as the pattern task
  4374. kmp_taskdata_t *current_task = thread->th.th_current_task;
  4375. thread->th.th_current_task = taskdata->td_parent;
  4376. kmp_task_t *new_task =
  4377. __kmpc_omp_task_alloc(loc, gtid, 1, 3 * sizeof(void *),
  4378. sizeof(__taskloop_params_t), &__kmp_taskloop_task);
  4379. // restore current task
  4380. thread->th.th_current_task = current_task;
  4381. __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
  4382. p->task = next_task;
  4383. p->lb = (kmp_uint64 *)((char *)next_task + lower_offset);
  4384. p->ub = (kmp_uint64 *)((char *)next_task + upper_offset);
  4385. p->task_dup = task_dup;
  4386. p->st = st;
  4387. p->ub_glob = ub_glob;
  4388. p->num_tasks = n_tsk1;
  4389. p->grainsize = grainsize;
  4390. p->extras = ext1;
  4391. p->last_chunk = last_chunk1;
  4392. p->tc = tc1;
  4393. p->num_t_min = num_t_min;
  4394. #if OMPT_SUPPORT
  4395. p->codeptr_ra = codeptr_ra;
  4396. #endif
  4397. #if OMPT_SUPPORT
  4398. // schedule new task with correct return address for OMPT events
  4399. __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
  4400. #else
  4401. __kmp_omp_task(gtid, new_task, true); // schedule new task
  4402. #endif
  4403. // execute the 1st half of current subrange
  4404. if (n_tsk0 > num_t_min)
  4405. __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
  4406. ext0, last_chunk0, tc0, num_t_min,
  4407. #if OMPT_SUPPORT
  4408. codeptr_ra,
  4409. #endif
  4410. task_dup);
  4411. else
  4412. __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
  4413. gr_size0, ext0, last_chunk0, tc0,
  4414. #if OMPT_SUPPORT
  4415. codeptr_ra,
  4416. #endif
  4417. task_dup);
  4418. KA_TRACE(40, ("__kmp_taskloop_recur(exit): T#%d\n", gtid));
  4419. }
  4420. static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
  4421. kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
  4422. int nogroup, int sched, kmp_uint64 grainsize,
  4423. int modifier, void *task_dup) {
  4424. kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
  4425. KMP_DEBUG_ASSERT(task != NULL);
  4426. if (nogroup == 0) {
  4427. #if OMPT_SUPPORT && OMPT_OPTIONAL
  4428. OMPT_STORE_RETURN_ADDRESS(gtid);
  4429. #endif
  4430. __kmpc_taskgroup(loc, gtid);
  4431. }
  4432. // =========================================================================
  4433. // calculate loop parameters
  4434. kmp_taskloop_bounds_t task_bounds(task, lb, ub);
  4435. kmp_uint64 tc;
  4436. // compiler provides global bounds here
  4437. kmp_uint64 lower = task_bounds.get_lb();
  4438. kmp_uint64 upper = task_bounds.get_ub();
  4439. kmp_uint64 ub_glob = upper; // global upper used to calc lastprivate flag
  4440. kmp_uint64 num_tasks = 0, extras = 0;
  4441. kmp_int64 last_chunk =
  4442. 0; // reduce grainsize of last task by last_chunk in strict mode
  4443. kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
  4444. kmp_info_t *thread = __kmp_threads[gtid];
  4445. kmp_taskdata_t *current_task = thread->th.th_current_task;
  4446. KA_TRACE(20, ("__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
  4447. "grain %llu(%d, %d), dup %p\n",
  4448. gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
  4449. task_dup));
  4450. // compute trip count
  4451. if (st == 1) { // most common case
  4452. tc = upper - lower + 1;
  4453. } else if (st < 0) {
  4454. tc = (lower - upper) / (-st) + 1;
  4455. } else { // st > 0
  4456. tc = (upper - lower) / st + 1;
  4457. }
  4458. if (tc == 0) {
  4459. KA_TRACE(20, ("__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
  4460. // free the pattern task and exit
  4461. __kmp_task_start(gtid, task, current_task);
  4462. // do not execute anything for zero-trip loop
  4463. __kmp_task_finish<false>(gtid, task, current_task);
  4464. return;
  4465. }
  4466. #if OMPT_SUPPORT && OMPT_OPTIONAL
  4467. ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
  4468. ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
  4469. if (ompt_enabled.ompt_callback_work) {
  4470. ompt_callbacks.ompt_callback(ompt_callback_work)(
  4471. ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
  4472. &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
  4473. }
  4474. #endif
  4475. if (num_tasks_min == 0)
  4476. // TODO: can we choose better default heuristic?
  4477. num_tasks_min =
  4478. KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
  4479. // compute num_tasks/grainsize based on the input provided
  4480. switch (sched) {
  4481. case 0: // no schedule clause specified, we can choose the default
  4482. // let's try to schedule (team_size*10) tasks
  4483. grainsize = thread->th.th_team_nproc * 10;
  4484. KMP_FALLTHROUGH();
  4485. case 2: // num_tasks provided
  4486. if (grainsize > tc) {
  4487. num_tasks = tc; // too big num_tasks requested, adjust values
  4488. grainsize = 1;
  4489. extras = 0;
  4490. } else {
  4491. num_tasks = grainsize;
  4492. grainsize = tc / num_tasks;
  4493. extras = tc % num_tasks;
  4494. }
  4495. break;
  4496. case 1: // grainsize provided
  4497. if (grainsize > tc) {
  4498. num_tasks = 1;
  4499. grainsize = tc; // too big grainsize requested, adjust values
  4500. extras = 0;
  4501. } else {
  4502. if (modifier) {
  4503. num_tasks = (tc + grainsize - 1) / grainsize;
  4504. last_chunk = tc - (num_tasks * grainsize);
  4505. extras = 0;
  4506. } else {
  4507. num_tasks = tc / grainsize;
  4508. // adjust grainsize for balanced distribution of iterations
  4509. grainsize = tc / num_tasks;
  4510. extras = tc % num_tasks;
  4511. }
  4512. }
  4513. break;
  4514. default:
  4515. KMP_ASSERT2(0, "unknown scheduling of taskloop");
  4516. }
  4517. KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
  4518. (last_chunk < 0 ? last_chunk : extras));
  4519. KMP_DEBUG_ASSERT(num_tasks > extras);
  4520. KMP_DEBUG_ASSERT(num_tasks > 0);
  4521. // =========================================================================
  4522. // check if clause value first
  4523. // Also require GOMP_taskloop to reduce to linear (taskdata->td_flags.native)
  4524. if (if_val == 0) { // if(0) specified, mark task as serial
  4525. taskdata->td_flags.task_serial = 1;
  4526. taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
  4527. // always start serial tasks linearly
  4528. __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
  4529. grainsize, extras, last_chunk, tc,
  4530. #if OMPT_SUPPORT
  4531. OMPT_GET_RETURN_ADDRESS(0),
  4532. #endif
  4533. task_dup);
  4534. // !taskdata->td_flags.native => currently force linear spawning of tasks
  4535. // for GOMP_taskloop
  4536. } else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
  4537. KA_TRACE(20, ("__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
  4538. "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
  4539. gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
  4540. last_chunk));
  4541. __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
  4542. grainsize, extras, last_chunk, tc, num_tasks_min,
  4543. #if OMPT_SUPPORT
  4544. OMPT_GET_RETURN_ADDRESS(0),
  4545. #endif
  4546. task_dup);
  4547. } else {
  4548. KA_TRACE(20, ("__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
  4549. "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
  4550. gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
  4551. last_chunk));
  4552. __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
  4553. grainsize, extras, last_chunk, tc,
  4554. #if OMPT_SUPPORT
  4555. OMPT_GET_RETURN_ADDRESS(0),
  4556. #endif
  4557. task_dup);
  4558. }
  4559. #if OMPT_SUPPORT && OMPT_OPTIONAL
  4560. if (ompt_enabled.ompt_callback_work) {
  4561. ompt_callbacks.ompt_callback(ompt_callback_work)(
  4562. ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
  4563. &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
  4564. }
  4565. #endif
  4566. if (nogroup == 0) {
  4567. #if OMPT_SUPPORT && OMPT_OPTIONAL
  4568. OMPT_STORE_RETURN_ADDRESS(gtid);
  4569. #endif
  4570. __kmpc_end_taskgroup(loc, gtid);
  4571. }
  4572. KA_TRACE(20, ("__kmp_taskloop(exit): T#%d\n", gtid));
  4573. }
  4574. /*!
  4575. @ingroup TASKING
  4576. @param loc Source location information
  4577. @param gtid Global thread ID
  4578. @param task Task structure
  4579. @param if_val Value of the if clause
  4580. @param lb Pointer to loop lower bound in task structure
  4581. @param ub Pointer to loop upper bound in task structure
  4582. @param st Loop stride
  4583. @param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
  4584. @param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
  4585. @param grainsize Schedule value if specified
  4586. @param task_dup Tasks duplication routine
  4587. Execute the taskloop construct.
  4588. */
  4589. void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
  4590. kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
  4591. int sched, kmp_uint64 grainsize, void *task_dup) {
  4592. __kmp_assert_valid_gtid(gtid);
  4593. KA_TRACE(20, ("__kmpc_taskloop(enter): T#%d\n", gtid));
  4594. __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
  4595. 0, task_dup);
  4596. KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d\n", gtid));
  4597. }
  4598. /*!
  4599. @ingroup TASKING
  4600. @param loc Source location information
  4601. @param gtid Global thread ID
  4602. @param task Task structure
  4603. @param if_val Value of the if clause
  4604. @param lb Pointer to loop lower bound in task structure
  4605. @param ub Pointer to loop upper bound in task structure
  4606. @param st Loop stride
  4607. @param nogroup Flag, 1 if nogroup clause specified, 0 otherwise
  4608. @param sched Schedule specified 0/1/2 for none/grainsize/num_tasks
  4609. @param grainsize Schedule value if specified
  4610. @param modifier Modifier 'strict' for sched, 1 if present, 0 otherwise
  4611. @param task_dup Tasks duplication routine
  4612. Execute the taskloop construct.
  4613. */
  4614. void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
  4615. kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
  4616. int nogroup, int sched, kmp_uint64 grainsize,
  4617. int modifier, void *task_dup) {
  4618. __kmp_assert_valid_gtid(gtid);
  4619. KA_TRACE(20, ("__kmpc_taskloop_5(enter): T#%d\n", gtid));
  4620. __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
  4621. modifier, task_dup);
  4622. KA_TRACE(20, ("__kmpc_taskloop_5(exit): T#%d\n", gtid));
  4623. }