123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670 |
- #include "kmp_wait_release.h"
- #include "kmp_barrier.h"
- #include "kmp_itt.h"
- #include "kmp_os.h"
- #include "kmp_stats.h"
- #include "ompt-specific.h"
- #include "kmp_affinity.h"
- #if KMP_MIC
- #include <immintrin.h>
- #define USE_NGO_STORES 1
- #endif
- #if KMP_MIC && USE_NGO_STORES
- #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
- #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
- #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
- #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
- #else
- #define ngo_load(src) ((void)0)
- #define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
- #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
- #define ngo_sync() ((void)0)
- #endif
- void __kmp_print_structure(void);
- void distributedBarrier::computeVarsForN(size_t n) {
- int nsockets = 1;
- if (__kmp_topology) {
- int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
- int core_level = __kmp_topology->get_level(KMP_HW_CORE);
- int ncores_per_socket =
- __kmp_topology->calculate_ratio(core_level, socket_level);
- nsockets = __kmp_topology->get_count(socket_level);
- if (nsockets <= 0)
- nsockets = 1;
- if (ncores_per_socket <= 0)
- ncores_per_socket = 1;
- threads_per_go = ncores_per_socket >> 1;
- if (!fix_threads_per_go) {
-
- if (threads_per_go > 4) {
- if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
- threads_per_go = threads_per_go >> 1;
- }
- if (threads_per_go > 4 && nsockets == 1)
- threads_per_go = threads_per_go >> 1;
- }
- }
- if (threads_per_go == 0)
- threads_per_go = 1;
- fix_threads_per_go = true;
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- if (nsockets == 1 || num_gos == 1)
- num_groups = 1;
- else {
- num_groups = num_gos / nsockets;
- if (num_gos % nsockets)
- num_groups++;
- }
- if (num_groups <= 0)
- num_groups = 1;
- gos_per_group = num_gos / num_groups;
- if (num_gos % num_groups)
- gos_per_group++;
- threads_per_group = threads_per_go * gos_per_group;
- } else {
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- if (num_gos == 1)
- num_groups = 1;
- else {
- num_groups = num_gos / 2;
- if (num_gos % 2)
- num_groups++;
- }
- gos_per_group = num_gos / num_groups;
- if (num_gos % num_groups)
- gos_per_group++;
- threads_per_group = threads_per_go * gos_per_group;
- }
- }
- void distributedBarrier::computeGo(size_t n) {
-
- for (num_gos = 1;; num_gos++)
- if (IDEAL_CONTENTION * num_gos >= n)
- break;
- threads_per_go = n / num_gos;
- if (n % num_gos)
- threads_per_go++;
- while (num_gos > MAX_GOS) {
- threads_per_go++;
- num_gos = n / threads_per_go;
- if (n % threads_per_go)
- num_gos++;
- }
- computeVarsForN(n);
- }
- void distributedBarrier::resize(size_t nthr) {
- KMP_DEBUG_ASSERT(nthr > max_threads);
-
- max_threads = nthr * 2;
-
- for (int i = 0; i < MAX_ITERS; ++i) {
- if (flags[i])
- flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
- max_threads * sizeof(flags_s));
- else
- flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(flags_s));
- }
- if (go)
- go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads * sizeof(go_s));
- else
- go = (go_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(go_s));
- if (iter)
- iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads * sizeof(iter_s));
- else
- iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(iter_s));
- if (sleep)
- sleep =
- (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads * sizeof(sleep_s));
- else
- sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads * sizeof(sleep_s));
- }
- kmp_uint64 distributedBarrier::go_release() {
- kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
- for (size_t j = 0; j < num_gos; j++) {
- go[j].go.store(next_go);
- }
- return next_go;
- }
- void distributedBarrier::go_reset() {
- for (size_t j = 0; j < max_threads; ++j) {
- for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
- flags[i][j].stillNeed = 1;
- }
- go[j].go.store(0);
- iter[j].iter = 0;
- }
- }
- void distributedBarrier::init(size_t nthr) {
- size_t old_max = max_threads;
- if (nthr > max_threads) {
- resize(nthr);
- }
- for (size_t i = 0; i < max_threads; i++) {
- for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
- flags[j][i].stillNeed = 1;
- }
- go[i].go.store(0);
- iter[i].iter = 0;
- if (i >= old_max)
- sleep[i].sleep = false;
- }
-
- computeVarsForN(nthr);
- num_threads = nthr;
- if (team_icvs == NULL)
- team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
- }
- void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
- size_t start, size_t stop, size_t inc,
- size_t tid) {
- KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- kmp_info_t **other_threads = team->t.t_threads;
- for (size_t thr = start; thr < stop; thr += inc) {
- KMP_DEBUG_ASSERT(other_threads[thr]);
- int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
-
- __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
- }
- }
- static void __kmp_dist_barrier_gather(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
- kmp_team_t *team;
- distributedBarrier *b;
- kmp_info_t **other_threads;
- kmp_uint64 my_current_iter, my_next_iter;
- kmp_uint32 nproc;
- bool group_leader;
- team = this_thr->th.th_team;
- nproc = this_thr->th.th_team_nproc;
- other_threads = team->t.t_threads;
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
- group_leader = ((tid % b->threads_per_group) == 0);
- KA_TRACE(20,
- ("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
- __itt_get_timestamp();
- }
- #endif
- if (group_leader) {
-
- size_t group_start = tid + 1;
- size_t group_end = tid + b->threads_per_group;
- size_t threads_pending = 0;
- if (group_end > nproc)
- group_end = nproc;
- do {
- threads_pending = 0;
-
- for (size_t thr = group_start; thr < group_end; thr++) {
-
- threads_pending += b->flags[my_current_iter][thr].stillNeed;
- }
-
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- kmp_task_team_t *task_team = this_thr->th.th_task_team;
- if (task_team != NULL) {
- if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team)) {
- int tasks_completed = FALSE;
- __kmp_atomic_execute_tasks_64(
- this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
- &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- } else
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- } else {
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- }
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- } else if (__kmp_tasking_mode != tskm_immediate_exec &&
- this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
- this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- }
- } while (threads_pending > 0);
- if (reduce) {
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
-
- for (size_t thr = group_start; thr < group_end; thr++) {
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[thr]->th.th_local.reduce_data);
- }
- OMPT_REDUCTION_END;
- }
-
- b->flags[my_next_iter][tid].stillNeed = 1;
-
-
- b->flags[my_current_iter][tid].stillNeed = 0;
- do {
- threads_pending = 0;
- for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
- threads_pending += b->flags[my_current_iter][thr].stillNeed;
- }
-
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- kmp_task_team_t *task_team = this_thr->th.th_task_team;
- if (task_team != NULL) {
- if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team)) {
- int tasks_completed = FALSE;
- __kmp_atomic_execute_tasks_64(
- this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
- &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- } else
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- } else {
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- }
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- } else if (__kmp_tasking_mode != tskm_immediate_exec &&
- this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
- this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- }
- } while (threads_pending > 0);
- if (reduce) {
- if (KMP_MASTER_TID(tid)) {
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- for (size_t thr = b->threads_per_group; thr < nproc;
- thr += b->threads_per_group) {
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[thr]->th.th_local.reduce_data);
- }
- OMPT_REDUCTION_END;
- }
- }
- } else {
-
- b->flags[my_next_iter][tid].stillNeed = 1;
-
-
- b->flags[my_current_iter][tid].stillNeed = 0;
- }
- KMP_MFENCE();
- KA_TRACE(20,
- ("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- static void __kmp_dist_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
- kmp_team_t *team;
- distributedBarrier *b;
- kmp_bstate_t *thr_bar;
- kmp_uint64 my_current_iter, next_go;
- size_t my_go_index;
- bool group_leader;
- KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
- gtid, tid, bt));
- thr_bar = &this_thr->th.th_bar[bt].bb;
- if (!KMP_MASTER_TID(tid)) {
-
- do {
- if (this_thr->th.th_used_in_team.load() != 1 &&
- this_thr->th.th_used_in_team.load() != 3) {
-
-
-
-
- kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
- if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
- 0) ||
- this_thr->th.th_used_in_team.load() == 0) {
- my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
-
- itt_sync_obj =
- __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
-
- __kmp_itt_task_starting(itt_sync_obj);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj != NULL)
-
- __kmp_itt_task_finished(itt_sync_obj);
- } else
- #endif
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- }
- if (this_thr->th.th_used_in_team.load() != 1 &&
- this_thr->th.th_used_in_team.load() != 3)
- continue;
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
-
-
-
-
-
- tid = __kmp_tid_from_gtid(gtid);
- team = this_thr->th.th_team;
- KMP_DEBUG_ASSERT(tid >= 0);
- KMP_DEBUG_ASSERT(team);
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- next_go = my_current_iter + distributedBarrier::MAX_ITERS;
- my_go_index = tid / b->threads_per_go;
- if (this_thr->th.th_used_in_team.load() == 3) {
- KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
- }
-
- if (b->go[my_go_index].go.load() != next_go) {
-
- kmp_atomic_flag_64<false, true> my_flag(
- &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
- my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
- KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
- b->iter[tid].iter == 0);
- KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
- }
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
-
-
-
-
- if (this_thr->th.th_used_in_team.load() == 1)
- break;
- } while (1);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- group_leader = ((tid % b->threads_per_group) == 0);
- if (group_leader) {
-
- for (size_t go_idx = my_go_index + 1;
- go_idx < my_go_index + b->gos_per_group; go_idx++) {
- b->go[go_idx].go.store(next_go);
- }
-
- KMP_MFENCE();
- }
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
- tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- (kmp_internal_control_t *)team->t.b->team_icvs);
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- }
- #endif
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
-
-
- size_t nproc = this_thr->th.th_team_nproc;
- size_t group_end = tid + b->threads_per_group;
- if (nproc < group_end)
- group_end = nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
- }
- } else {
- team = this_thr->th.th_team;
- b = team->t.b;
- my_current_iter = b->iter[tid].iter;
- next_go = my_current_iter + distributedBarrier::MAX_ITERS;
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) {
-
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- }
- #endif
-
- for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
- b->go[go_idx].go.store(next_go);
- }
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
-
- size_t nproc = this_thr->th.th_team_nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
- b->threads_per_group, tid);
- }
-
- for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
- b->go[go_idx].go.store(next_go);
- }
-
- KMP_MFENCE();
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
-
- size_t nproc = this_thr->th.th_team_nproc;
- size_t group_end = tid + b->threads_per_group;
- if (nproc < group_end)
- group_end = nproc;
- __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
- }
- }
-
- KMP_ASSERT(my_current_iter == b->iter[tid].iter);
- b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
- KA_TRACE(
- 20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- template <bool cancellable = false>
- static bool __kmp_linear_barrier_gather_template(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
- kmp_team_t *team = this_thr->th.th_team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_info_t **other_threads = team->t.t_threads;
- KA_TRACE(
- 20,
- ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
- __itt_get_timestamp();
- }
- #endif
-
-
- if (!KMP_MASTER_TID(tid)) {
- KA_TRACE(20,
- ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
- "arrived(%p): %llu => %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
- team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
- thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
-
-
- kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
- flag.release();
- } else {
- kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
- int nproc = this_thr->th.th_team_nproc;
- int i;
-
- kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
-
- for (i = 1; i < nproc; ++i) {
- #if KMP_CACHE_MANAGE
-
- if (i + 1 < nproc)
- KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
- #endif
- KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
- "arrived(%p) == %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
- team->t.t_id, i,
- &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
-
- if (cancellable) {
- kmp_flag_64<true, false> flag(
- &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
- if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)))
- return true;
- } else {
- kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
- new_state);
- flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
-
- if (__kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_min_time = KMP_MIN(
- this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
- }
- #endif
- if (reduce) {
- KA_TRACE(100,
- ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
- team->t.t_id, i));
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[i]->th.th_local.reduce_data);
- OMPT_REDUCTION_END;
- }
- }
-
- team_bar->b_arrived = new_state;
- KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
- "arrived(%p) = %llu\n",
- gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
- new_state));
- }
- KA_TRACE(
- 20,
- ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- return false;
- }
- template <bool cancellable = false>
- static bool __kmp_linear_barrier_release_template(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_team_t *team;
- if (KMP_MASTER_TID(tid)) {
- unsigned int i;
- kmp_uint32 nproc = this_thr->th.th_team_nproc;
- kmp_info_t **other_threads;
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- other_threads = team->t.t_threads;
- KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- if (nproc > 1) {
- #if KMP_BARRIER_ICV_PUSH
- {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
- if (propagate_icvs) {
- ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
- for (i = 1; i < nproc; ++i) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
- team, i, FALSE);
- ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
- }
- ngo_sync();
- }
- }
- #endif
-
- for (i = 1; i < nproc; ++i) {
- #if KMP_CACHE_MANAGE
-
- if (i + 1 < nproc)
- KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
- #endif
- KA_TRACE(
- 20,
- ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
- "go(%p): %u => %u\n",
- gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
- team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
- other_threads[i]->th.th_bar[bt].bb.b_go,
- other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
- kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
- other_threads[i]);
- flag.release();
- }
- }
- } else {
- KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
- gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
- if (cancellable) {
- kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
- if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)))
- return true;
- } else {
- kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
- flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
-
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
-
- __kmp_itt_task_starting(itt_sync_obj);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return false;
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj != NULL)
-
- __kmp_itt_task_finished(itt_sync_obj);
- } else
- #endif
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return false;
- #ifdef KMP_DEBUG
- tid = __kmp_tid_from_gtid(gtid);
- team = __kmp_threads[gtid]->th.th_team;
- #endif
- KMP_DEBUG_ASSERT(team != NULL);
- TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
- KA_TRACE(20,
- ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
- gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
- KMP_MB();
- }
- KA_TRACE(
- 20,
- ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- return false;
- }
- static void __kmp_linear_barrier_gather(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- __kmp_linear_barrier_gather_template<false>(
- bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- static bool __kmp_linear_barrier_gather_cancellable(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- return __kmp_linear_barrier_gather_template<true>(
- bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- static void __kmp_linear_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- __kmp_linear_barrier_release_template<false>(
- bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- static bool __kmp_linear_barrier_release_cancellable(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- return __kmp_linear_barrier_release_template<true>(
- bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- static void __kmp_tree_barrier_gather(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
- kmp_team_t *team = this_thr->th.th_team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_info_t **other_threads = team->t.t_threads;
- kmp_uint32 nproc = this_thr->th.th_team_nproc;
- kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
- kmp_uint32 branch_factor = 1 << branch_bits;
- kmp_uint32 child;
- kmp_uint32 child_tid;
- kmp_uint64 new_state = 0;
- KA_TRACE(
- 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
- __itt_get_timestamp();
- }
- #endif
-
-
- child_tid = (tid << branch_bits) + 1;
- if (child_tid < nproc) {
-
- new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
- child = 1;
- do {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- #if KMP_CACHE_MANAGE
-
- if (child + 1 <= branch_factor && child_tid + 1 < nproc)
- KMP_CACHE_PREFETCH(
- &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
- #endif
- KA_TRACE(20,
- ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
- "arrived(%p) == %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
-
- kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
- flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
-
- if (__kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
- child_thr->th.th_bar_min_time);
- }
- #endif
- if (reduce) {
- KA_TRACE(100,
- ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid));
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- (*reduce)(this_thr->th.th_local.reduce_data,
- child_thr->th.th_local.reduce_data);
- OMPT_REDUCTION_END;
- }
- child++;
- child_tid++;
- } while (child <= branch_factor && child_tid < nproc);
- }
- if (!KMP_MASTER_TID(tid)) {
- kmp_int32 parent_tid = (tid - 1) >> branch_bits;
- KA_TRACE(20,
- ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
- "arrived(%p): %llu => %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
- team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
- thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
-
-
- kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
- flag.release();
- } else {
-
- if (nproc > 1)
- team->t.t_bar[bt].b_arrived = new_state;
- else
- team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
- KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
- "arrived(%p) = %llu\n",
- gtid, team->t.t_id, tid, team->t.t_id,
- &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
- }
- KA_TRACE(20,
- ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- static void __kmp_tree_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
- kmp_team_t *team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_uint32 nproc;
- kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
- kmp_uint32 branch_factor = 1 << branch_bits;
- kmp_uint32 child;
- kmp_uint32 child_tid;
-
- if (!KMP_MASTER_TID(
- tid)) {
- KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
- &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
- flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
-
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
-
- __kmp_itt_task_starting(itt_sync_obj);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj != NULL)
-
- __kmp_itt_task_finished(itt_sync_obj);
- } else
- #endif
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- tid = __kmp_tid_from_gtid(gtid);
- TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
- KA_TRACE(20,
- ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
- team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
- KMP_MB();
- } else {
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- nproc = this_thr->th.th_team_nproc;
- child_tid = (tid << branch_bits) + 1;
- if (child_tid < nproc) {
- kmp_info_t **other_threads = team->t.t_threads;
- child = 1;
-
- do {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- #if KMP_CACHE_MANAGE
-
- if (child + 1 <= branch_factor && child_tid + 1 < nproc)
- KMP_CACHE_PREFETCH(
- &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
- #endif
- #if KMP_BARRIER_ICV_PUSH
- {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident,
- team->t.t_threads[child_tid], team,
- child_tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
- &team->t.t_implicit_task_taskdata[0].td_icvs);
- }
- }
- #endif
- KA_TRACE(20,
- ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
- "go(%p): %u => %u\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
- child_bar->b_go + KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&child_bar->b_go, child_thr);
- flag.release();
- child++;
- child_tid++;
- } while (child <= branch_factor && child_tid < nproc);
- }
- KA_TRACE(
- 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- static void __kmp_hyper_barrier_gather(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
- kmp_team_t *team = this_thr->th.th_team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_info_t **other_threads = team->t.t_threads;
- kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
- kmp_uint32 num_threads = this_thr->th.th_team_nproc;
- kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
- kmp_uint32 branch_factor = 1 << branch_bits;
- kmp_uint32 offset;
- kmp_uint32 level;
- KA_TRACE(
- 20,
- ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
- __itt_get_timestamp();
- }
- #endif
-
- kmp_flag_64<> p_flag(&thr_bar->b_arrived);
- for (level = 0, offset = 1; offset < num_threads;
- level += branch_bits, offset <<= branch_bits) {
- kmp_uint32 child;
- kmp_uint32 child_tid;
- if (((tid >> level) & (branch_factor - 1)) != 0) {
- kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
- KMP_MB();
- KA_TRACE(20,
- ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
- "arrived(%p): %llu => %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
- team->t.t_id, parent_tid, &thr_bar->b_arrived,
- thr_bar->b_arrived,
- thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
-
-
- p_flag.set_waiter(other_threads[parent_tid]);
- p_flag.release();
- break;
- }
-
- if (new_state == KMP_BARRIER_UNUSED_STATE)
- new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
- for (child = 1, child_tid = tid + (1 << level);
- child < branch_factor && child_tid < num_threads;
- child++, child_tid += (1 << level)) {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- #if KMP_CACHE_MANAGE
- kmp_uint32 next_child_tid = child_tid + (1 << level);
-
- if (child + 1 < branch_factor && next_child_tid < num_threads)
- KMP_CACHE_PREFETCH(
- &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
- #endif
- KA_TRACE(20,
- ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
- "arrived(%p) == %llu\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
-
- kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
- c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- KMP_MB();
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
-
- if (__kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
- child_thr->th.th_bar_min_time);
- }
- #endif
- if (reduce) {
- KA_TRACE(100,
- ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid));
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- (*reduce)(this_thr->th.th_local.reduce_data,
- child_thr->th.th_local.reduce_data);
- OMPT_REDUCTION_END;
- }
- }
- }
- if (KMP_MASTER_TID(tid)) {
-
- if (new_state == KMP_BARRIER_UNUSED_STATE)
- team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
- else
- team->t.t_bar[bt].b_arrived = new_state;
- KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
- "arrived(%p) = %llu\n",
- gtid, team->t.t_id, tid, team->t.t_id,
- &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
- }
- KA_TRACE(
- 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- #define KMP_REVERSE_HYPER_BAR
- static void __kmp_hyper_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
- kmp_team_t *team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_info_t **other_threads;
- kmp_uint32 num_threads;
- kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
- kmp_uint32 branch_factor = 1 << branch_bits;
- kmp_uint32 child;
- kmp_uint32 child_tid;
- kmp_uint32 offset;
- kmp_uint32 level;
-
- if (KMP_MASTER_TID(tid)) {
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) {
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- }
- #endif
- } else {
- KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
- &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
- flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
-
- __kmp_itt_task_starting(itt_sync_obj);
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj != NULL)
-
- __kmp_itt_task_finished(itt_sync_obj);
- } else
- #endif
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- tid = __kmp_tid_from_gtid(gtid);
- TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
- KA_TRACE(20,
- ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
- gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
- KMP_MB();
- }
- num_threads = this_thr->th.th_team_nproc;
- other_threads = team->t.t_threads;
- #ifdef KMP_REVERSE_HYPER_BAR
-
- for (level = 0, offset = 1;
- offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
- level += branch_bits, offset <<= branch_bits)
- ;
-
- for (level -= branch_bits, offset >>= branch_bits; offset != 0;
- level -= branch_bits, offset >>= branch_bits)
- #else
-
- for (level = 0, offset = 1; offset < num_threads;
- level += branch_bits, offset <<= branch_bits)
- #endif
- {
- #ifdef KMP_REVERSE_HYPER_BAR
-
- child = num_threads >> ((level == 0) ? level : level - 1);
- for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
- child_tid = tid + (child << level);
- child >= 1; child--, child_tid -= (1 << level))
- #else
- if (((tid >> level) & (branch_factor - 1)) != 0)
-
-
- break;
-
- for (child = 1, child_tid = tid + (1 << level);
- child < branch_factor && child_tid < num_threads;
- child++, child_tid += (1 << level))
- #endif
- {
- if (child_tid >= num_threads)
- continue;
- else {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- #if KMP_CACHE_MANAGE
- kmp_uint32 next_child_tid = child_tid - (1 << level);
- #ifdef KMP_REVERSE_HYPER_BAR
- if (child - 1 >= 1 && next_child_tid < num_threads)
- #else
- if (child + 1 < branch_factor && next_child_tid < num_threads)
- #endif
- KMP_CACHE_PREFETCH(
- &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
- #endif
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs)
- copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
- #endif
- KA_TRACE(
- 20,
- ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
- "go(%p): %u => %u\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
- child_bar->b_go + KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&child_bar->b_go, child_thr);
- flag.release();
- }
- }
- }
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs &&
- !KMP_MASTER_TID(tid)) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
- FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &thr_bar->th_fixed_icvs);
- }
- #endif
- KA_TRACE(
- 20,
- ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
- kmp_bstate_t *thr_bar,
- kmp_uint32 nproc, int gtid,
- int tid, kmp_team_t *team) {
-
- bool uninitialized = thr_bar->team == NULL;
- bool team_changed = team != thr_bar->team;
- bool team_sz_changed = nproc != thr_bar->nproc;
- bool tid_changed = tid != thr_bar->old_tid;
- bool retval = false;
- if (uninitialized || team_sz_changed) {
- __kmp_get_hierarchy(nproc, thr_bar);
- }
- if (uninitialized || team_sz_changed || tid_changed) {
- thr_bar->my_level = thr_bar->depth - 1;
- thr_bar->parent_tid = -1;
- if (!KMP_MASTER_TID(tid)) {
-
- kmp_uint32 d = 0;
- while (d < thr_bar->depth) {
-
- kmp_uint32 rem;
- if (d == thr_bar->depth - 2) {
- thr_bar->parent_tid = 0;
- thr_bar->my_level = d;
- break;
- } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
-
-
- thr_bar->parent_tid = tid - rem;
- thr_bar->my_level = d;
- break;
- }
- ++d;
- }
- }
- __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
- (thr_bar->skip_per_level[thr_bar->my_level])),
- &(thr_bar->offset));
- thr_bar->old_tid = tid;
- thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
- thr_bar->team = team;
- thr_bar->parent_bar =
- &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
- }
- if (uninitialized || team_changed || tid_changed) {
- thr_bar->team = team;
- thr_bar->parent_bar =
- &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
- retval = true;
- }
- if (uninitialized || team_sz_changed || tid_changed) {
- thr_bar->nproc = nproc;
- thr_bar->leaf_kids = thr_bar->base_leaf_kids;
- if (thr_bar->my_level == 0)
- thr_bar->leaf_kids = 0;
- if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
- __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
- thr_bar->leaf_state = 0;
- for (int i = 0; i < thr_bar->leaf_kids; ++i)
- ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
- }
- return retval;
- }
- static void __kmp_hierarchical_barrier_gather(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
- kmp_team_t *team = this_thr->th.th_team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_uint32 nproc = this_thr->th.th_team_nproc;
- kmp_info_t **other_threads = team->t.t_threads;
- kmp_uint64 new_state = 0;
- int level = team->t.t_level;
- if (other_threads[0]
- ->th.th_teams_microtask)
- if (this_thr->th.th_teams_size.nteams > 1)
- ++level;
- if (level == 1)
- thr_bar->use_oncore_barrier = 1;
- else
- thr_bar->use_oncore_barrier = 0;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
- this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
- }
- #endif
- (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
- team);
- if (thr_bar->my_level) {
- kmp_int32 child_tid;
- new_state =
- (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
- if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
- thr_bar->use_oncore_barrier) {
- if (thr_bar->leaf_kids) {
-
- kmp_uint64 leaf_state =
- KMP_MASTER_TID(tid)
- ? thr_bar->b_arrived | thr_bar->leaf_state
- : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
- "for leaf kids\n",
- gtid, team->t.t_id, tid));
- kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
- flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- if (reduce) {
- OMPT_REDUCTION_DECL(this_thr, gtid);
- OMPT_REDUCTION_BEGIN;
- for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
- ++child_tid) {
- KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
- "T#%d(%d:%d)\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid));
- (*reduce)(this_thr->th.th_local.reduce_data,
- other_threads[child_tid]->th.th_local.reduce_data);
- }
- OMPT_REDUCTION_END;
- }
-
- KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
- }
-
- for (kmp_uint32 d = 1; d < thr_bar->my_level;
- ++d) {
- kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
- skip = thr_bar->skip_per_level[d];
- if (last > nproc)
- last = nproc;
- for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
- "T#%d(%d:%d) "
- "arrived(%p) == %llu\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid, &child_bar->b_arrived, new_state));
- kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
- flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- if (reduce) {
- KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
- "T#%d(%d:%d)\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid));
- (*reduce)(this_thr->th.th_local.reduce_data,
- child_thr->th.th_local.reduce_data);
- }
- }
- }
- } else {
- for (kmp_uint32 d = 0; d < thr_bar->my_level;
- ++d) {
- kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
- skip = thr_bar->skip_per_level[d];
- if (last > nproc)
- last = nproc;
- for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
- kmp_info_t *child_thr = other_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
- "T#%d(%d:%d) "
- "arrived(%p) == %llu\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid, &child_bar->b_arrived, new_state));
- kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
- flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- if (reduce) {
- KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
- "T#%d(%d:%d)\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid));
- (*reduce)(this_thr->th.th_local.reduce_data,
- child_thr->th.th_local.reduce_data);
- }
- }
- }
- }
- }
-
- if (!KMP_MASTER_TID(tid)) {
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
- " T#%d(%d:%d) arrived(%p): %llu => %llu\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
- thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
- thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
-
- if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
- !thr_bar->use_oncore_barrier) {
-
- kmp_flag_64<> flag(&thr_bar->b_arrived,
- other_threads[thr_bar->parent_tid]);
- flag.release();
- } else {
-
- thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
- kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
- thr_bar->offset + 1);
- flag.set_waiter(other_threads[thr_bar->parent_tid]);
- flag.release();
- }
- } else {
- team->t.t_bar[bt].b_arrived = new_state;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
- "arrived(%p) = %llu\n",
- gtid, team->t.t_id, tid, team->t.t_id,
- &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
- }
-
- KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- static void __kmp_hierarchical_barrier_release(
- enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
- int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
- kmp_team_t *team;
- kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
- kmp_uint32 nproc;
- bool team_change = false;
- if (KMP_MASTER_TID(tid)) {
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
- "entered barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- } else {
-
- if (!thr_bar->use_oncore_barrier ||
- __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
- thr_bar->team == NULL) {
-
- thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
- kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
- flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- TCW_8(thr_bar->b_go,
- KMP_INIT_BARRIER_STATE);
- } else {
-
-
- thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
- kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
- thr_bar->offset + 1, bt,
- this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
- flag.wait(this_thr, TRUE);
- if (thr_bar->wait_flag ==
- KMP_BARRIER_SWITCHING) {
- TCW_8(thr_bar->b_go,
- KMP_INIT_BARRIER_STATE);
- } else {
- (RCAST(volatile char *,
- &(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0;
- }
- }
- thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
-
- if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
- return;
-
- team = __kmp_threads[gtid]->th.th_team;
- KMP_DEBUG_ASSERT(team != NULL);
- tid = __kmp_tid_from_gtid(gtid);
- KA_TRACE(
- 20,
- ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
- gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
- KMP_MB();
- }
- nproc = this_thr->th.th_team_nproc;
- int level = team->t.t_level;
- if (team->t.t_threads[0]
- ->th.th_teams_microtask) {
- if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
- this_thr->th.th_teams_level == level)
- ++level;
- if (this_thr->th.th_teams_size.nteams > 1)
- ++level;
- }
- if (level == 1)
- thr_bar->use_oncore_barrier = 1;
- else
- thr_bar->use_oncore_barrier = 0;
-
-
- unsigned short int old_leaf_kids = thr_bar->leaf_kids;
- kmp_uint64 old_leaf_state = thr_bar->leaf_state;
- team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
- tid, team);
-
- if (team_change)
- old_leaf_kids = 0;
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs) {
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
- FALSE);
- if (KMP_MASTER_TID(
- tid)) {
- copy_icvs(&thr_bar->th_fixed_icvs,
- &team->t.t_implicit_task_taskdata[tid].td_icvs);
- } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
- thr_bar->use_oncore_barrier) {
- if (!thr_bar->my_level)
-
-
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &thr_bar->parent_bar->th_fixed_icvs);
-
- } else {
- if (thr_bar->my_level)
-
- copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
- else
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &thr_bar->parent_bar->th_fixed_icvs);
- }
- }
- #endif
-
- if (thr_bar->my_level) {
- kmp_int32 child_tid;
- kmp_uint32 last;
- if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
- thr_bar->use_oncore_barrier) {
- if (KMP_MASTER_TID(tid)) {
-
-
- thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
-
-
- ngo_load(&thr_bar->th_fixed_icvs);
-
-
- for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
- child_tid += thr_bar->skip_per_level[1]) {
- kmp_bstate_t *child_bar =
- &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
- "releasing T#%d(%d:%d)"
- " go(%p): %u => %u\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid, &child_bar->b_go, child_bar->b_go,
- child_bar->b_go + KMP_BARRIER_STATE_BUMP));
-
-
- ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
- }
- ngo_sync();
- }
- TCW_8(thr_bar->b_go,
- KMP_INIT_BARRIER_STATE);
-
- if (thr_bar->leaf_kids) {
-
- if (team_change ||
- old_leaf_kids < thr_bar->leaf_kids) {
- if (old_leaf_kids) {
- thr_bar->b_go |= old_leaf_state;
- }
-
- last = tid + thr_bar->skip_per_level[1];
- if (last > nproc)
- last = nproc;
- for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
- ++child_tid) {
- kmp_info_t *child_thr = team->t.t_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- KA_TRACE(
- 20,
- ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
- " T#%d(%d:%d) go(%p): %u => %u\n",
- gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
- team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
- child_bar->b_go + KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&child_bar->b_go, child_thr);
- flag.release();
- }
- } else {
-
- thr_bar->b_go |= thr_bar->leaf_state;
- }
- }
- } else {
- for (int d = thr_bar->my_level - 1; d >= 0;
- --d) {
- last = tid + thr_bar->skip_per_level[d + 1];
- kmp_uint32 skip = thr_bar->skip_per_level[d];
- if (last > nproc)
- last = nproc;
- for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
- kmp_info_t *child_thr = team->t.t_threads[child_tid];
- kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
- KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
- "releasing T#%d(%d:%d) go(%p): %u => %u\n",
- gtid, team->t.t_id, tid,
- __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
- child_tid, &child_bar->b_go, child_bar->b_go,
- child_bar->b_go + KMP_BARRIER_STATE_BUMP));
-
- kmp_flag_64<> flag(&child_bar->b_go, child_thr);
- flag.release();
- }
- }
- }
- #if KMP_BARRIER_ICV_PUSH
- if (propagate_icvs && !KMP_MASTER_TID(tid))
-
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &thr_bar->th_fixed_icvs);
- #endif
- }
- KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
- "barrier type %d\n",
- gtid, team->t.t_id, tid, bt));
- }
- template <bool cancellable> struct is_cancellable {};
- template <> struct is_cancellable<true> {
- bool value;
- is_cancellable() : value(false) {}
- is_cancellable(bool b) : value(b) {}
- is_cancellable &operator=(bool b) {
- value = b;
- return *this;
- }
- operator bool() const { return value; }
- };
- template <> struct is_cancellable<false> {
- is_cancellable &operator=(bool b) { return *this; }
- constexpr operator bool() const { return false; }
- };
- template <bool cancellable = false>
- static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
- size_t reduce_size, void *reduce_data,
- void (*reduce)(void *, void *)) {
- KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
- KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
- int tid = __kmp_tid_from_gtid(gtid);
- kmp_info_t *this_thr = __kmp_threads[gtid];
- kmp_team_t *team = this_thr->th.th_team;
- int status = 0;
- is_cancellable<cancellable> cancelled;
- #if OMPT_SUPPORT && OMPT_OPTIONAL
- ompt_data_t *my_task_data;
- ompt_data_t *my_parallel_data;
- void *return_address;
- ompt_sync_region_t barrier_kind;
- #endif
- KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
- __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- #if OMPT_OPTIONAL
- my_task_data = OMPT_CUR_TASK_DATA(this_thr);
- my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
- return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
- barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
- if (ompt_enabled.ompt_callback_sync_region) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
- return_address);
- }
- if (ompt_enabled.ompt_callback_sync_region_wait) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
- return_address);
- }
- #endif
-
-
-
- this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
- }
- #endif
- if (!team->t.t_serialized) {
- #if USE_ITT_BUILD
-
- void *itt_sync_obj = NULL;
- #if USE_ITT_NOTIFY
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
- #endif
- #endif
- if (__kmp_tasking_mode == tskm_extra_barrier) {
- __kmp_tasking_barrier(team, this_thr, gtid);
- KA_TRACE(15,
- ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
- __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
- }
-
-
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- #if KMP_USE_MONITOR
- this_thr->th.th_team_bt_intervals =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
- this_thr->th.th_team_bt_set =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
- #else
- this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
- #endif
- }
- #if USE_ITT_BUILD
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_starting(gtid, itt_sync_obj);
- #endif
- #if USE_DEBUGGER
-
- if (KMP_MASTER_TID(tid)) {
- team->t.t_bar[bt].b_master_arrived += 1;
- } else {
- this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
- }
- #endif
- if (reduce != NULL) {
-
- this_thr->th.th_local.reduce_data = reduce_data;
- }
- if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
-
- __kmp_task_team_setup(this_thr, team, 0);
- if (cancellable) {
- cancelled = __kmp_linear_barrier_gather_cancellable(
- bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- } else {
- switch (__kmp_barrier_gather_pattern[bt]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
- reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hyper_bar: {
-
- KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
- __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
- reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hierarchical_bar: {
- __kmp_hierarchical_barrier_gather(
- bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_tree_bar: {
-
- KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
- __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
- reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- default: {
- __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
- reduce USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- }
- }
- KMP_MB();
- if (KMP_MASTER_TID(tid)) {
- status = 0;
- if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
- __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- #if USE_DEBUGGER
-
-
- team->t.t_bar[bt].b_team_arrived += 1;
- #endif
- if (__kmp_omp_cancellation) {
- kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request);
-
- if (cancel_request == cancel_loop ||
- cancel_request == cancel_sections) {
- KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq);
- }
- }
- #if USE_ITT_BUILD
-
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_middle(gtid, itt_sync_obj);
- #endif
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
- __kmp_forkjoin_frames_mode &&
- (this_thr->th.th_teams_microtask == NULL ||
- this_thr->th.th_teams_size.nteams == 1) &&
- team->t.t_active_level == 1) {
- ident_t *loc = __kmp_threads[gtid]->th.th_ident;
- kmp_uint64 cur_time = __itt_get_timestamp();
- kmp_info_t **other_threads = team->t.t_threads;
- int nproc = this_thr->th.th_team_nproc;
- int i;
- switch (__kmp_forkjoin_frames_mode) {
- case 1:
- __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
- loc, nproc);
- this_thr->th.th_frame_time = cur_time;
- break;
- case 2:
-
- __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
- 1, loc, nproc);
- break;
- case 3:
- if (__itt_metadata_add_ptr) {
-
- kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
-
-
- this_thr->th.th_bar_arrive_time = 0;
- for (i = 1; i < nproc; ++i) {
- delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
- other_threads[i]->th.th_bar_arrive_time = 0;
- }
- __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
- cur_time, delta,
- (kmp_uint64)(reduce != NULL));
- }
- __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
- loc, nproc);
- this_thr->th.th_frame_time = cur_time;
- break;
- }
- }
- #endif
- } else {
- status = 1;
- #if USE_ITT_BUILD
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_middle(gtid, itt_sync_obj);
- #endif
- }
- if ((status == 1 || !is_split) && !cancelled) {
- if (cancellable) {
- cancelled = __kmp_linear_barrier_release_cancellable(
- bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- } else {
- switch (__kmp_barrier_release_pattern[bt]) {
- case bp_dist_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hyper_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hierarchical_bar: {
- __kmp_hierarchical_barrier_release(
- bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_tree_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- default: {
- __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- }
- }
- if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
- __kmp_task_team_sync(this_thr, team);
- }
- }
- #if USE_ITT_BUILD
-
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_finished(gtid, itt_sync_obj);
- #endif
- } else {
- status = 0;
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- if (this_thr->th.th_task_team != NULL) {
- #if USE_ITT_NOTIFY
- void *itt_sync_obj = NULL;
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
- __kmp_itt_barrier_starting(gtid, itt_sync_obj);
- }
- #endif
- KMP_DEBUG_ASSERT(
- this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||
- this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
- TRUE);
- __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
- __kmp_task_team_setup(this_thr, team, 0);
- #if USE_ITT_BUILD
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_finished(gtid, itt_sync_obj);
- #endif
- }
- }
- }
- KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
- gtid, __kmp_team_from_gtid(gtid)->t.t_id,
- __kmp_tid_from_gtid(gtid), status));
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- #if OMPT_OPTIONAL
- if (ompt_enabled.ompt_callback_sync_region_wait) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
- return_address);
- }
- if (ompt_enabled.ompt_callback_sync_region) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
- return_address);
- }
- #endif
- this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
- }
- #endif
- if (cancellable)
- return (int)cancelled;
- return status;
- }
- int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
- size_t reduce_size, void *reduce_data,
- void (*reduce)(void *, void *)) {
- return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
- reduce);
- }
- #if defined(KMP_GOMP_COMPAT)
- int __kmp_barrier_gomp_cancel(int gtid) {
- if (__kmp_omp_cancellation) {
- int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE,
- 0, NULL, NULL);
- if (cancelled) {
- int tid = __kmp_tid_from_gtid(gtid);
- kmp_info_t *this_thr = __kmp_threads[gtid];
- if (KMP_MASTER_TID(tid)) {
-
- } else {
-
- this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
- KMP_BARRIER_STATE_BUMP;
- }
- }
- return cancelled;
- }
- __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
- return FALSE;
- }
- #endif
- void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
- KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
- KMP_DEBUG_ASSERT(bt < bs_last_barrier);
- int tid = __kmp_tid_from_gtid(gtid);
- kmp_info_t *this_thr = __kmp_threads[gtid];
- kmp_team_t *team = this_thr->th.th_team;
- if (!team->t.t_serialized) {
- if (KMP_MASTER_GTID(gtid)) {
- switch (__kmp_barrier_release_pattern[bt]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- break;
- }
- case bp_hyper_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- break;
- }
- case bp_hierarchical_bar: {
- __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- break;
- }
- case bp_tree_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
- __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- break;
- }
- default: {
- __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
- FALSE USE_ITT_BUILD_ARG(NULL));
- }
- }
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- __kmp_task_team_sync(this_thr, team);
- }
- }
- }
- }
- void __kmp_join_barrier(int gtid) {
- KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
- KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
- KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
- kmp_info_t *this_thr = __kmp_threads[gtid];
- kmp_team_t *team;
- int tid;
- #ifdef KMP_DEBUG
- int team_id;
- #endif
- #if USE_ITT_BUILD
- void *itt_sync_obj = NULL;
- #if USE_ITT_NOTIFY
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- #endif
- #endif
- #if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
- int nproc = this_thr->th.th_team_nproc;
- #endif
- KMP_MB();
-
- team = this_thr->th.th_team;
- KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
- tid = __kmp_tid_from_gtid(gtid);
- #ifdef KMP_DEBUG
- team_id = team->t.t_id;
- kmp_info_t *master_thread = this_thr->th.th_team_master;
- if (master_thread != team->t.t_threads[0]) {
- __kmp_print_structure();
- }
- #endif
- KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
- KMP_MB();
-
- KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
- KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
- KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
- KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
- gtid, team_id, tid));
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled) {
- #if OMPT_OPTIONAL
- ompt_data_t *my_task_data;
- ompt_data_t *my_parallel_data;
- void *codeptr = NULL;
- int ds_tid = this_thr->th.th_info.ds.ds_tid;
- if (KMP_MASTER_TID(ds_tid) &&
- (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
- codeptr = team->t.ompt_team_info.master_return_address;
- my_task_data = OMPT_CUR_TASK_DATA(this_thr);
- my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
- if (ompt_enabled.ompt_callback_sync_region) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
- my_task_data, codeptr);
- }
- if (ompt_enabled.ompt_callback_sync_region_wait) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
- my_task_data, codeptr);
- }
- if (!KMP_MASTER_TID(ds_tid))
- this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
- #endif
- this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
- }
- #endif
- if (__kmp_tasking_mode == tskm_extra_barrier) {
- __kmp_tasking_barrier(team, this_thr, gtid);
- KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
- gtid, team_id, tid));
- }
- #ifdef KMP_DEBUG
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
- "%p, th_task_team = %p\n",
- __kmp_gtid_from_thread(this_thr), team_id,
- team->t.t_task_team[this_thr->th.th_task_state],
- this_thr->th.th_task_team));
- if (this_thr->th.th_task_team)
- KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
- team->t.t_task_team[this_thr->th.th_task_state]);
- }
- #endif
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- #if KMP_USE_MONITOR
- this_thr->th.th_team_bt_intervals =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
- this_thr->th.th_team_bt_set =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
- #else
- this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
- #endif
- }
- #if USE_ITT_BUILD
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_starting(gtid, itt_sync_obj);
- #endif
- switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hyper_bar: {
- KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
- __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hierarchical_bar: {
- __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_tree_bar: {
- KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
- __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- default: {
- __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
- NULL USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- }
-
- if (KMP_MASTER_TID(tid)) {
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- if (__kmp_display_affinity) {
- KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
- }
- #if KMP_STATS_ENABLED
-
-
-
- for (int i = 0; i < team->t.t_nproc; ++i) {
- kmp_info_t *team_thread = team->t.t_threads[i];
- if (team_thread == this_thr)
- continue;
- team_thread->th.th_stats->setIdleFlag();
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
- team_thread->th.th_sleep_loc != NULL)
- __kmp_null_resume_wrapper(team_thread);
- }
- #endif
- #if USE_ITT_BUILD
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_middle(gtid, itt_sync_obj);
- #endif
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
-
- if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
- __kmp_forkjoin_frames_mode &&
- (this_thr->th.th_teams_microtask == NULL ||
- this_thr->th.th_teams_size.nteams == 1) &&
- team->t.t_active_level == 1) {
- kmp_uint64 cur_time = __itt_get_timestamp();
- ident_t *loc = team->t.t_ident;
- kmp_info_t **other_threads = team->t.t_threads;
- switch (__kmp_forkjoin_frames_mode) {
- case 1:
- __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
- loc, nproc);
- break;
- case 2:
- __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
- loc, nproc);
- break;
- case 3:
- if (__itt_metadata_add_ptr) {
-
- kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
-
-
- this_thr->th.th_bar_arrive_time = 0;
- for (int i = 1; i < nproc; ++i) {
- delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
- other_threads[i]->th.th_bar_arrive_time = 0;
- }
- __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
- cur_time, delta, 0);
- }
- __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
- loc, nproc);
- this_thr->th.th_frame_time = cur_time;
- break;
- }
- }
- #endif
- }
- #if USE_ITT_BUILD
- else {
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
- __kmp_itt_barrier_middle(gtid, itt_sync_obj);
- }
- #endif
- #if KMP_DEBUG
- if (KMP_MASTER_TID(tid)) {
- KA_TRACE(
- 15,
- ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
- gtid, team_id, tid, nproc));
- }
- #endif
-
- KMP_MB();
- KA_TRACE(10,
- ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
- }
- void __kmp_fork_barrier(int gtid, int tid) {
- KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
- KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
- kmp_info_t *this_thr = __kmp_threads[gtid];
- kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
- #if USE_ITT_BUILD
- void *itt_sync_obj = NULL;
- #endif
- if (team)
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
- (team != NULL) ? team->t.t_id : -1, tid));
-
- if (KMP_MASTER_TID(tid)) {
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
- __kmp_itt_barrier_middle(gtid, itt_sync_obj);
- }
- #endif
- #ifdef KMP_DEBUG
- KMP_DEBUG_ASSERT(team);
- kmp_info_t **other_threads = team->t.t_threads;
- int i;
-
- KMP_MB();
- for (i = 1; i < team->t.t_nproc; ++i) {
- KA_TRACE(500,
- ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
- "== %u.\n",
- gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
- team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
- other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
- KMP_DEBUG_ASSERT(
- (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
- ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
- KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
- }
- #endif
- if (__kmp_tasking_mode != tskm_immediate_exec) {
-
- __kmp_task_team_setup(this_thr, team, 0);
- }
-
-
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- #if KMP_USE_MONITOR
- this_thr->th.th_team_bt_intervals =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
- this_thr->th.th_team_bt_set =
- team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
- #else
- this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
- #endif
- }
- }
- switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
- case bp_dist_bar: {
- __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(NULL));
- break;
- }
- case bp_hyper_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
- __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_hierarchical_bar: {
- __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- case bp_tree_bar: {
- KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
- __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- break;
- }
- default: {
- __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
- TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- }
- #if OMPT_SUPPORT
- if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
- int ds_tid = this_thr->th.th_info.ds.ds_tid;
- ompt_data_t *task_data = (team)
- ? OMPT_CUR_TASK_DATA(this_thr)
- : &(this_thr->th.ompt_thread_info.task_data);
- this_thr->th.ompt_thread_info.state = ompt_state_overhead;
- #if OMPT_OPTIONAL
- void *codeptr = NULL;
- if (KMP_MASTER_TID(ds_tid) &&
- (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
- codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
- if (ompt_enabled.ompt_callback_sync_region_wait) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
- ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
- codeptr);
- }
- if (ompt_enabled.ompt_callback_sync_region) {
- ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
- ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
- codeptr);
- }
- #endif
- if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
- ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
- ompt_scope_end, NULL, task_data, 0, ds_tid,
- ompt_task_implicit);
- }
- }
- #endif
-
- if (TCR_4(__kmp_global.g.g_done)) {
- this_thr->th.th_task_team = NULL;
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
- if (!KMP_MASTER_TID(tid)) {
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- if (itt_sync_obj)
- __kmp_itt_barrier_finished(gtid, itt_sync_obj);
- }
- }
- #endif
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
- return;
- }
-
- team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
- KMP_DEBUG_ASSERT(team != NULL);
- tid = __kmp_tid_from_gtid(gtid);
- #if KMP_BARRIER_ICV_PULL
-
- {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
- if (!KMP_MASTER_TID(tid)) {
-
-
- KA_TRACE(10,
- ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
- __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
- tid, FALSE);
- copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
- &team->t.t_threads[0]
- ->th.th_bar[bs_forkjoin_barrier]
- .bb.th_fixed_icvs);
- }
- }
- #endif
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- __kmp_task_team_sync(this_thr, team);
- }
- #if KMP_AFFINITY_SUPPORTED
- kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
- if (proc_bind == proc_bind_intel) {
-
- if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
- __kmp_balanced_affinity(this_thr, team->t.t_nproc);
- }
- } else if (proc_bind != proc_bind_false) {
- if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
- KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
- __kmp_gtid_from_thread(this_thr),
- this_thr->th.th_current_place));
- } else {
- __kmp_affinity_set_place(gtid);
- }
- }
- #endif
-
- if (__kmp_display_affinity) {
- if (team->t.t_display_affinity
- #if KMP_AFFINITY_SUPPORTED
- || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed)
- #endif
- ) {
-
- __kmp_aux_display_affinity(gtid, NULL);
- this_thr->th.th_prev_num_threads = team->t.t_nproc;
- this_thr->th.th_prev_level = team->t.t_level;
- }
- }
- if (!KMP_MASTER_TID(tid))
- KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
- #if USE_ITT_BUILD && USE_ITT_NOTIFY
- if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
- if (!KMP_MASTER_TID(tid)) {
-
- itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
- __kmp_itt_barrier_finished(gtid, itt_sync_obj);
- }
- }
- #endif
- KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
- team->t.t_id, tid));
- }
- void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
- kmp_internal_control_t *new_icvs, ident_t *loc) {
- KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
- KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
- KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
- #if KMP_BARRIER_ICV_PULL
-
- KMP_DEBUG_ASSERT(team->t.t_threads[0]);
-
- copy_icvs(
- &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
- new_icvs);
- KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
- team->t.t_threads[0], team));
- #elif KMP_BARRIER_ICV_PUSH
-
-
- KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
- team->t.t_threads[0], team));
- #else
-
-
- ngo_load(new_icvs);
- KMP_DEBUG_ASSERT(team->t.t_threads[0]);
-
- for (int f = 1; f < new_nproc; ++f) {
-
- KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
- f, team->t.t_threads[f], team));
- __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
- ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
- KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
- f, team->t.t_threads[f], team));
- }
- ngo_sync();
- #endif
- }
|