1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877 |
- /*
- * kmp_atomic.cpp -- ATOMIC implementation routines
- */
- //===----------------------------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #include "kmp_atomic.h"
- #include "kmp.h" // TRUE, asm routines prototypes
- typedef unsigned char uchar;
- typedef unsigned short ushort;
- /*!
- @defgroup ATOMIC_OPS Atomic Operations
- These functions are used for implementing the many different varieties of atomic
- operations.
- The compiler is at liberty to inline atomic operations that are naturally
- supported by the target architecture. For instance on IA-32 architecture an
- atomic like this can be inlined
- @code
- static int s = 0;
- #pragma omp atomic
- s++;
- @endcode
- using the single instruction: `lock; incl s`
- However the runtime does provide entrypoints for these operations to support
- compilers that choose not to inline them. (For instance,
- `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
- The names of the functions are encoded by using the data type name and the
- operation name, as in these tables.
- Data Type | Data type encoding
- -----------|---------------
- int8_t | `fixed1`
- uint8_t | `fixed1u`
- int16_t | `fixed2`
- uint16_t | `fixed2u`
- int32_t | `fixed4`
- uint32_t | `fixed4u`
- int32_t | `fixed8`
- uint32_t | `fixed8u`
- float | `float4`
- double | `float8`
- float 10 (8087 eighty bit float) | `float10`
- complex<float> | `cmplx4`
- complex<double> | `cmplx8`
- complex<float10> | `cmplx10`
- <br>
- Operation | Operation encoding
- ----------|-------------------
- + | add
- - | sub
- \* | mul
- / | div
- & | andb
- << | shl
- \>\> | shr
- \| | orb
- ^ | xor
- && | andl
- \|\| | orl
- maximum | max
- minimum | min
- .eqv. | eqv
- .neqv. | neqv
- <br>
- For non-commutative operations, `_rev` can also be added for the reversed
- operation. For the functions that capture the result, the suffix `_cpt` is
- added.
- Update Functions
- ================
- The general form of an atomic function that just performs an update (without a
- `capture`)
- @code
- void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
- lhs, TYPE rhs );
- @endcode
- @param ident_t a pointer to source location
- @param gtid the global thread id
- @param lhs a pointer to the left operand
- @param rhs the right operand
- `capture` functions
- ===================
- The capture functions perform an atomic update and return a result, which is
- either the value before the capture, or that after. They take an additional
- argument to determine which result is returned.
- Their general form is therefore
- @code
- TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
- lhs, TYPE rhs, int flag );
- @endcode
- @param ident_t a pointer to source location
- @param gtid the global thread id
- @param lhs a pointer to the left operand
- @param rhs the right operand
- @param flag one if the result is to be captured *after* the operation, zero if
- captured *before*.
- The one set of exceptions to this is the `complex<float>` type where the value
- is not returned, rather an extra argument pointer is passed.
- They look like
- @code
- void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
- lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
- @endcode
- Read and Write Operations
- =========================
- The OpenMP<sup>*</sup> standard now supports atomic operations that simply
- ensure that the value is read or written atomically, with no modification
- performed. In many cases on IA-32 architecture these operations can be inlined
- since the architecture guarantees that no tearing occurs on aligned objects
- accessed with a single memory operation of up to 64 bits in size.
- The general form of the read operations is
- @code
- TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
- @endcode
- For the write operations the form is
- @code
- void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
- );
- @endcode
- Full list of functions
- ======================
- This leads to the generation of 376 atomic functions, as follows.
- Functions for integers
- ---------------------
- There are versions here for integers of size 1,2,4 and 8 bytes both signed and
- unsigned (where that matters).
- @code
- __kmpc_atomic_fixed1_add
- __kmpc_atomic_fixed1_add_cpt
- __kmpc_atomic_fixed1_add_fp
- __kmpc_atomic_fixed1_andb
- __kmpc_atomic_fixed1_andb_cpt
- __kmpc_atomic_fixed1_andl
- __kmpc_atomic_fixed1_andl_cpt
- __kmpc_atomic_fixed1_div
- __kmpc_atomic_fixed1_div_cpt
- __kmpc_atomic_fixed1_div_cpt_rev
- __kmpc_atomic_fixed1_div_float8
- __kmpc_atomic_fixed1_div_fp
- __kmpc_atomic_fixed1_div_rev
- __kmpc_atomic_fixed1_eqv
- __kmpc_atomic_fixed1_eqv_cpt
- __kmpc_atomic_fixed1_max
- __kmpc_atomic_fixed1_max_cpt
- __kmpc_atomic_fixed1_min
- __kmpc_atomic_fixed1_min_cpt
- __kmpc_atomic_fixed1_mul
- __kmpc_atomic_fixed1_mul_cpt
- __kmpc_atomic_fixed1_mul_float8
- __kmpc_atomic_fixed1_mul_fp
- __kmpc_atomic_fixed1_neqv
- __kmpc_atomic_fixed1_neqv_cpt
- __kmpc_atomic_fixed1_orb
- __kmpc_atomic_fixed1_orb_cpt
- __kmpc_atomic_fixed1_orl
- __kmpc_atomic_fixed1_orl_cpt
- __kmpc_atomic_fixed1_rd
- __kmpc_atomic_fixed1_shl
- __kmpc_atomic_fixed1_shl_cpt
- __kmpc_atomic_fixed1_shl_cpt_rev
- __kmpc_atomic_fixed1_shl_rev
- __kmpc_atomic_fixed1_shr
- __kmpc_atomic_fixed1_shr_cpt
- __kmpc_atomic_fixed1_shr_cpt_rev
- __kmpc_atomic_fixed1_shr_rev
- __kmpc_atomic_fixed1_sub
- __kmpc_atomic_fixed1_sub_cpt
- __kmpc_atomic_fixed1_sub_cpt_rev
- __kmpc_atomic_fixed1_sub_fp
- __kmpc_atomic_fixed1_sub_rev
- __kmpc_atomic_fixed1_swp
- __kmpc_atomic_fixed1_wr
- __kmpc_atomic_fixed1_xor
- __kmpc_atomic_fixed1_xor_cpt
- __kmpc_atomic_fixed1u_add_fp
- __kmpc_atomic_fixed1u_sub_fp
- __kmpc_atomic_fixed1u_mul_fp
- __kmpc_atomic_fixed1u_div
- __kmpc_atomic_fixed1u_div_cpt
- __kmpc_atomic_fixed1u_div_cpt_rev
- __kmpc_atomic_fixed1u_div_fp
- __kmpc_atomic_fixed1u_div_rev
- __kmpc_atomic_fixed1u_shr
- __kmpc_atomic_fixed1u_shr_cpt
- __kmpc_atomic_fixed1u_shr_cpt_rev
- __kmpc_atomic_fixed1u_shr_rev
- __kmpc_atomic_fixed2_add
- __kmpc_atomic_fixed2_add_cpt
- __kmpc_atomic_fixed2_add_fp
- __kmpc_atomic_fixed2_andb
- __kmpc_atomic_fixed2_andb_cpt
- __kmpc_atomic_fixed2_andl
- __kmpc_atomic_fixed2_andl_cpt
- __kmpc_atomic_fixed2_div
- __kmpc_atomic_fixed2_div_cpt
- __kmpc_atomic_fixed2_div_cpt_rev
- __kmpc_atomic_fixed2_div_float8
- __kmpc_atomic_fixed2_div_fp
- __kmpc_atomic_fixed2_div_rev
- __kmpc_atomic_fixed2_eqv
- __kmpc_atomic_fixed2_eqv_cpt
- __kmpc_atomic_fixed2_max
- __kmpc_atomic_fixed2_max_cpt
- __kmpc_atomic_fixed2_min
- __kmpc_atomic_fixed2_min_cpt
- __kmpc_atomic_fixed2_mul
- __kmpc_atomic_fixed2_mul_cpt
- __kmpc_atomic_fixed2_mul_float8
- __kmpc_atomic_fixed2_mul_fp
- __kmpc_atomic_fixed2_neqv
- __kmpc_atomic_fixed2_neqv_cpt
- __kmpc_atomic_fixed2_orb
- __kmpc_atomic_fixed2_orb_cpt
- __kmpc_atomic_fixed2_orl
- __kmpc_atomic_fixed2_orl_cpt
- __kmpc_atomic_fixed2_rd
- __kmpc_atomic_fixed2_shl
- __kmpc_atomic_fixed2_shl_cpt
- __kmpc_atomic_fixed2_shl_cpt_rev
- __kmpc_atomic_fixed2_shl_rev
- __kmpc_atomic_fixed2_shr
- __kmpc_atomic_fixed2_shr_cpt
- __kmpc_atomic_fixed2_shr_cpt_rev
- __kmpc_atomic_fixed2_shr_rev
- __kmpc_atomic_fixed2_sub
- __kmpc_atomic_fixed2_sub_cpt
- __kmpc_atomic_fixed2_sub_cpt_rev
- __kmpc_atomic_fixed2_sub_fp
- __kmpc_atomic_fixed2_sub_rev
- __kmpc_atomic_fixed2_swp
- __kmpc_atomic_fixed2_wr
- __kmpc_atomic_fixed2_xor
- __kmpc_atomic_fixed2_xor_cpt
- __kmpc_atomic_fixed2u_add_fp
- __kmpc_atomic_fixed2u_sub_fp
- __kmpc_atomic_fixed2u_mul_fp
- __kmpc_atomic_fixed2u_div
- __kmpc_atomic_fixed2u_div_cpt
- __kmpc_atomic_fixed2u_div_cpt_rev
- __kmpc_atomic_fixed2u_div_fp
- __kmpc_atomic_fixed2u_div_rev
- __kmpc_atomic_fixed2u_shr
- __kmpc_atomic_fixed2u_shr_cpt
- __kmpc_atomic_fixed2u_shr_cpt_rev
- __kmpc_atomic_fixed2u_shr_rev
- __kmpc_atomic_fixed4_add
- __kmpc_atomic_fixed4_add_cpt
- __kmpc_atomic_fixed4_add_fp
- __kmpc_atomic_fixed4_andb
- __kmpc_atomic_fixed4_andb_cpt
- __kmpc_atomic_fixed4_andl
- __kmpc_atomic_fixed4_andl_cpt
- __kmpc_atomic_fixed4_div
- __kmpc_atomic_fixed4_div_cpt
- __kmpc_atomic_fixed4_div_cpt_rev
- __kmpc_atomic_fixed4_div_float8
- __kmpc_atomic_fixed4_div_fp
- __kmpc_atomic_fixed4_div_rev
- __kmpc_atomic_fixed4_eqv
- __kmpc_atomic_fixed4_eqv_cpt
- __kmpc_atomic_fixed4_max
- __kmpc_atomic_fixed4_max_cpt
- __kmpc_atomic_fixed4_min
- __kmpc_atomic_fixed4_min_cpt
- __kmpc_atomic_fixed4_mul
- __kmpc_atomic_fixed4_mul_cpt
- __kmpc_atomic_fixed4_mul_float8
- __kmpc_atomic_fixed4_mul_fp
- __kmpc_atomic_fixed4_neqv
- __kmpc_atomic_fixed4_neqv_cpt
- __kmpc_atomic_fixed4_orb
- __kmpc_atomic_fixed4_orb_cpt
- __kmpc_atomic_fixed4_orl
- __kmpc_atomic_fixed4_orl_cpt
- __kmpc_atomic_fixed4_rd
- __kmpc_atomic_fixed4_shl
- __kmpc_atomic_fixed4_shl_cpt
- __kmpc_atomic_fixed4_shl_cpt_rev
- __kmpc_atomic_fixed4_shl_rev
- __kmpc_atomic_fixed4_shr
- __kmpc_atomic_fixed4_shr_cpt
- __kmpc_atomic_fixed4_shr_cpt_rev
- __kmpc_atomic_fixed4_shr_rev
- __kmpc_atomic_fixed4_sub
- __kmpc_atomic_fixed4_sub_cpt
- __kmpc_atomic_fixed4_sub_cpt_rev
- __kmpc_atomic_fixed4_sub_fp
- __kmpc_atomic_fixed4_sub_rev
- __kmpc_atomic_fixed4_swp
- __kmpc_atomic_fixed4_wr
- __kmpc_atomic_fixed4_xor
- __kmpc_atomic_fixed4_xor_cpt
- __kmpc_atomic_fixed4u_add_fp
- __kmpc_atomic_fixed4u_sub_fp
- __kmpc_atomic_fixed4u_mul_fp
- __kmpc_atomic_fixed4u_div
- __kmpc_atomic_fixed4u_div_cpt
- __kmpc_atomic_fixed4u_div_cpt_rev
- __kmpc_atomic_fixed4u_div_fp
- __kmpc_atomic_fixed4u_div_rev
- __kmpc_atomic_fixed4u_shr
- __kmpc_atomic_fixed4u_shr_cpt
- __kmpc_atomic_fixed4u_shr_cpt_rev
- __kmpc_atomic_fixed4u_shr_rev
- __kmpc_atomic_fixed8_add
- __kmpc_atomic_fixed8_add_cpt
- __kmpc_atomic_fixed8_add_fp
- __kmpc_atomic_fixed8_andb
- __kmpc_atomic_fixed8_andb_cpt
- __kmpc_atomic_fixed8_andl
- __kmpc_atomic_fixed8_andl_cpt
- __kmpc_atomic_fixed8_div
- __kmpc_atomic_fixed8_div_cpt
- __kmpc_atomic_fixed8_div_cpt_rev
- __kmpc_atomic_fixed8_div_float8
- __kmpc_atomic_fixed8_div_fp
- __kmpc_atomic_fixed8_div_rev
- __kmpc_atomic_fixed8_eqv
- __kmpc_atomic_fixed8_eqv_cpt
- __kmpc_atomic_fixed8_max
- __kmpc_atomic_fixed8_max_cpt
- __kmpc_atomic_fixed8_min
- __kmpc_atomic_fixed8_min_cpt
- __kmpc_atomic_fixed8_mul
- __kmpc_atomic_fixed8_mul_cpt
- __kmpc_atomic_fixed8_mul_float8
- __kmpc_atomic_fixed8_mul_fp
- __kmpc_atomic_fixed8_neqv
- __kmpc_atomic_fixed8_neqv_cpt
- __kmpc_atomic_fixed8_orb
- __kmpc_atomic_fixed8_orb_cpt
- __kmpc_atomic_fixed8_orl
- __kmpc_atomic_fixed8_orl_cpt
- __kmpc_atomic_fixed8_rd
- __kmpc_atomic_fixed8_shl
- __kmpc_atomic_fixed8_shl_cpt
- __kmpc_atomic_fixed8_shl_cpt_rev
- __kmpc_atomic_fixed8_shl_rev
- __kmpc_atomic_fixed8_shr
- __kmpc_atomic_fixed8_shr_cpt
- __kmpc_atomic_fixed8_shr_cpt_rev
- __kmpc_atomic_fixed8_shr_rev
- __kmpc_atomic_fixed8_sub
- __kmpc_atomic_fixed8_sub_cpt
- __kmpc_atomic_fixed8_sub_cpt_rev
- __kmpc_atomic_fixed8_sub_fp
- __kmpc_atomic_fixed8_sub_rev
- __kmpc_atomic_fixed8_swp
- __kmpc_atomic_fixed8_wr
- __kmpc_atomic_fixed8_xor
- __kmpc_atomic_fixed8_xor_cpt
- __kmpc_atomic_fixed8u_add_fp
- __kmpc_atomic_fixed8u_sub_fp
- __kmpc_atomic_fixed8u_mul_fp
- __kmpc_atomic_fixed8u_div
- __kmpc_atomic_fixed8u_div_cpt
- __kmpc_atomic_fixed8u_div_cpt_rev
- __kmpc_atomic_fixed8u_div_fp
- __kmpc_atomic_fixed8u_div_rev
- __kmpc_atomic_fixed8u_shr
- __kmpc_atomic_fixed8u_shr_cpt
- __kmpc_atomic_fixed8u_shr_cpt_rev
- __kmpc_atomic_fixed8u_shr_rev
- @endcode
- Functions for floating point
- ----------------------------
- There are versions here for floating point numbers of size 4, 8, 10 and 16
- bytes. (Ten byte floats are used by X87, but are now rare).
- @code
- __kmpc_atomic_float4_add
- __kmpc_atomic_float4_add_cpt
- __kmpc_atomic_float4_add_float8
- __kmpc_atomic_float4_add_fp
- __kmpc_atomic_float4_div
- __kmpc_atomic_float4_div_cpt
- __kmpc_atomic_float4_div_cpt_rev
- __kmpc_atomic_float4_div_float8
- __kmpc_atomic_float4_div_fp
- __kmpc_atomic_float4_div_rev
- __kmpc_atomic_float4_max
- __kmpc_atomic_float4_max_cpt
- __kmpc_atomic_float4_min
- __kmpc_atomic_float4_min_cpt
- __kmpc_atomic_float4_mul
- __kmpc_atomic_float4_mul_cpt
- __kmpc_atomic_float4_mul_float8
- __kmpc_atomic_float4_mul_fp
- __kmpc_atomic_float4_rd
- __kmpc_atomic_float4_sub
- __kmpc_atomic_float4_sub_cpt
- __kmpc_atomic_float4_sub_cpt_rev
- __kmpc_atomic_float4_sub_float8
- __kmpc_atomic_float4_sub_fp
- __kmpc_atomic_float4_sub_rev
- __kmpc_atomic_float4_swp
- __kmpc_atomic_float4_wr
- __kmpc_atomic_float8_add
- __kmpc_atomic_float8_add_cpt
- __kmpc_atomic_float8_add_fp
- __kmpc_atomic_float8_div
- __kmpc_atomic_float8_div_cpt
- __kmpc_atomic_float8_div_cpt_rev
- __kmpc_atomic_float8_div_fp
- __kmpc_atomic_float8_div_rev
- __kmpc_atomic_float8_max
- __kmpc_atomic_float8_max_cpt
- __kmpc_atomic_float8_min
- __kmpc_atomic_float8_min_cpt
- __kmpc_atomic_float8_mul
- __kmpc_atomic_float8_mul_cpt
- __kmpc_atomic_float8_mul_fp
- __kmpc_atomic_float8_rd
- __kmpc_atomic_float8_sub
- __kmpc_atomic_float8_sub_cpt
- __kmpc_atomic_float8_sub_cpt_rev
- __kmpc_atomic_float8_sub_fp
- __kmpc_atomic_float8_sub_rev
- __kmpc_atomic_float8_swp
- __kmpc_atomic_float8_wr
- __kmpc_atomic_float10_add
- __kmpc_atomic_float10_add_cpt
- __kmpc_atomic_float10_add_fp
- __kmpc_atomic_float10_div
- __kmpc_atomic_float10_div_cpt
- __kmpc_atomic_float10_div_cpt_rev
- __kmpc_atomic_float10_div_fp
- __kmpc_atomic_float10_div_rev
- __kmpc_atomic_float10_mul
- __kmpc_atomic_float10_mul_cpt
- __kmpc_atomic_float10_mul_fp
- __kmpc_atomic_float10_rd
- __kmpc_atomic_float10_sub
- __kmpc_atomic_float10_sub_cpt
- __kmpc_atomic_float10_sub_cpt_rev
- __kmpc_atomic_float10_sub_fp
- __kmpc_atomic_float10_sub_rev
- __kmpc_atomic_float10_swp
- __kmpc_atomic_float10_wr
- __kmpc_atomic_float16_add
- __kmpc_atomic_float16_add_cpt
- __kmpc_atomic_float16_div
- __kmpc_atomic_float16_div_cpt
- __kmpc_atomic_float16_div_cpt_rev
- __kmpc_atomic_float16_div_rev
- __kmpc_atomic_float16_max
- __kmpc_atomic_float16_max_cpt
- __kmpc_atomic_float16_min
- __kmpc_atomic_float16_min_cpt
- __kmpc_atomic_float16_mul
- __kmpc_atomic_float16_mul_cpt
- __kmpc_atomic_float16_rd
- __kmpc_atomic_float16_sub
- __kmpc_atomic_float16_sub_cpt
- __kmpc_atomic_float16_sub_cpt_rev
- __kmpc_atomic_float16_sub_rev
- __kmpc_atomic_float16_swp
- __kmpc_atomic_float16_wr
- @endcode
- Functions for Complex types
- ---------------------------
- Functions for complex types whose component floating point variables are of size
- 4,8,10 or 16 bytes. The names here are based on the size of the component float,
- *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
- operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
- @code
- __kmpc_atomic_cmplx4_add
- __kmpc_atomic_cmplx4_add_cmplx8
- __kmpc_atomic_cmplx4_add_cpt
- __kmpc_atomic_cmplx4_div
- __kmpc_atomic_cmplx4_div_cmplx8
- __kmpc_atomic_cmplx4_div_cpt
- __kmpc_atomic_cmplx4_div_cpt_rev
- __kmpc_atomic_cmplx4_div_rev
- __kmpc_atomic_cmplx4_mul
- __kmpc_atomic_cmplx4_mul_cmplx8
- __kmpc_atomic_cmplx4_mul_cpt
- __kmpc_atomic_cmplx4_rd
- __kmpc_atomic_cmplx4_sub
- __kmpc_atomic_cmplx4_sub_cmplx8
- __kmpc_atomic_cmplx4_sub_cpt
- __kmpc_atomic_cmplx4_sub_cpt_rev
- __kmpc_atomic_cmplx4_sub_rev
- __kmpc_atomic_cmplx4_swp
- __kmpc_atomic_cmplx4_wr
- __kmpc_atomic_cmplx8_add
- __kmpc_atomic_cmplx8_add_cpt
- __kmpc_atomic_cmplx8_div
- __kmpc_atomic_cmplx8_div_cpt
- __kmpc_atomic_cmplx8_div_cpt_rev
- __kmpc_atomic_cmplx8_div_rev
- __kmpc_atomic_cmplx8_mul
- __kmpc_atomic_cmplx8_mul_cpt
- __kmpc_atomic_cmplx8_rd
- __kmpc_atomic_cmplx8_sub
- __kmpc_atomic_cmplx8_sub_cpt
- __kmpc_atomic_cmplx8_sub_cpt_rev
- __kmpc_atomic_cmplx8_sub_rev
- __kmpc_atomic_cmplx8_swp
- __kmpc_atomic_cmplx8_wr
- __kmpc_atomic_cmplx10_add
- __kmpc_atomic_cmplx10_add_cpt
- __kmpc_atomic_cmplx10_div
- __kmpc_atomic_cmplx10_div_cpt
- __kmpc_atomic_cmplx10_div_cpt_rev
- __kmpc_atomic_cmplx10_div_rev
- __kmpc_atomic_cmplx10_mul
- __kmpc_atomic_cmplx10_mul_cpt
- __kmpc_atomic_cmplx10_rd
- __kmpc_atomic_cmplx10_sub
- __kmpc_atomic_cmplx10_sub_cpt
- __kmpc_atomic_cmplx10_sub_cpt_rev
- __kmpc_atomic_cmplx10_sub_rev
- __kmpc_atomic_cmplx10_swp
- __kmpc_atomic_cmplx10_wr
- __kmpc_atomic_cmplx16_add
- __kmpc_atomic_cmplx16_add_cpt
- __kmpc_atomic_cmplx16_div
- __kmpc_atomic_cmplx16_div_cpt
- __kmpc_atomic_cmplx16_div_cpt_rev
- __kmpc_atomic_cmplx16_div_rev
- __kmpc_atomic_cmplx16_mul
- __kmpc_atomic_cmplx16_mul_cpt
- __kmpc_atomic_cmplx16_rd
- __kmpc_atomic_cmplx16_sub
- __kmpc_atomic_cmplx16_sub_cpt
- __kmpc_atomic_cmplx16_sub_cpt_rev
- __kmpc_atomic_cmplx16_swp
- __kmpc_atomic_cmplx16_wr
- @endcode
- */
- /*!
- @ingroup ATOMIC_OPS
- @{
- */
- /*
- * Global vars
- */
- #ifndef KMP_GOMP_COMPAT
- int __kmp_atomic_mode = 1; // Intel perf
- #else
- int __kmp_atomic_mode = 2; // GOMP compatibility
- #endif /* KMP_GOMP_COMPAT */
- KMP_ALIGN(128)
- // Control access to all user coded atomics in Gnu compat mode
- kmp_atomic_lock_t __kmp_atomic_lock;
- // Control access to all user coded atomics for 1-byte fixed data types
- kmp_atomic_lock_t __kmp_atomic_lock_1i;
- // Control access to all user coded atomics for 2-byte fixed data types
- kmp_atomic_lock_t __kmp_atomic_lock_2i;
- // Control access to all user coded atomics for 4-byte fixed data types
- kmp_atomic_lock_t __kmp_atomic_lock_4i;
- // Control access to all user coded atomics for kmp_real32 data type
- kmp_atomic_lock_t __kmp_atomic_lock_4r;
- // Control access to all user coded atomics for 8-byte fixed data types
- kmp_atomic_lock_t __kmp_atomic_lock_8i;
- // Control access to all user coded atomics for kmp_real64 data type
- kmp_atomic_lock_t __kmp_atomic_lock_8r;
- // Control access to all user coded atomics for complex byte data type
- kmp_atomic_lock_t __kmp_atomic_lock_8c;
- // Control access to all user coded atomics for long double data type
- kmp_atomic_lock_t __kmp_atomic_lock_10r;
- // Control access to all user coded atomics for _Quad data type
- kmp_atomic_lock_t __kmp_atomic_lock_16r;
- // Control access to all user coded atomics for double complex data type
- kmp_atomic_lock_t __kmp_atomic_lock_16c;
- // Control access to all user coded atomics for long double complex type
- kmp_atomic_lock_t __kmp_atomic_lock_20c;
- // Control access to all user coded atomics for _Quad complex data type
- kmp_atomic_lock_t __kmp_atomic_lock_32c;
- /* 2007-03-02:
- Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
- on *_32 and *_32e. This is just a temporary workaround for the problem. It
- seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
- in assembler language. */
- #define KMP_ATOMIC_VOLATILE volatile
- #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
- static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q + rhs.q;
- }
- static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q - rhs.q;
- }
- static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q * rhs.q;
- }
- static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q / rhs.q;
- }
- static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q < rhs.q;
- }
- static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
- return lhs.q > rhs.q;
- }
- static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q + rhs.q;
- }
- static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q - rhs.q;
- }
- static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q * rhs.q;
- }
- static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q / rhs.q;
- }
- static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q < rhs.q;
- }
- static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
- return lhs.q > rhs.q;
- }
- static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
- kmp_cmplx128_a4_t &rhs) {
- return lhs.q + rhs.q;
- }
- static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
- kmp_cmplx128_a4_t &rhs) {
- return lhs.q - rhs.q;
- }
- static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
- kmp_cmplx128_a4_t &rhs) {
- return lhs.q * rhs.q;
- }
- static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
- kmp_cmplx128_a4_t &rhs) {
- return lhs.q / rhs.q;
- }
- static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
- kmp_cmplx128_a16_t &rhs) {
- return lhs.q + rhs.q;
- }
- static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
- kmp_cmplx128_a16_t &rhs) {
- return lhs.q - rhs.q;
- }
- static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
- kmp_cmplx128_a16_t &rhs) {
- return lhs.q * rhs.q;
- }
- static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
- kmp_cmplx128_a16_t &rhs) {
- return lhs.q / rhs.q;
- }
- #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
- // ATOMIC implementation routines -----------------------------------------
- // One routine for each operation and operand type.
- // All routines declarations looks like
- // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
- #define KMP_CHECK_GTID \
- if (gtid == KMP_GTID_UNKNOWN) { \
- gtid = __kmp_entry_gtid(); \
- } // check and get gtid when needed
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
- RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
- TYPE *lhs, TYPE rhs) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
- // ------------------------------------------------------------------------
- // Lock variables used for critical sections for various size operands
- #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
- #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
- #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
- #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
- #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
- #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
- #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
- #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
- #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
- #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
- #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
- #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
- #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- (*lhs) OP(rhs); \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- (*lhs) = (TYPE)((*lhs)OP rhs); \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- // ------------------------------------------------------------------------
- // For GNU compatibility, we may need to use a critical section,
- // even though it is not required by the ISA.
- //
- // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
- // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
- // critical section. On Intel(R) 64, all atomic operations are done with fetch
- // and add or compare and exchange. Therefore, the FLAG parameter to this
- // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
- // require a critical section, where we predict that they will be implemented
- // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
- //
- // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
- // the FLAG parameter should always be 1. If we know that we will be using
- // a critical section, then we want to make certain that we use the generic
- // lock __kmp_atomic_lock to protect the atomic update, and not of of the
- // locks that are specialized based upon the size or type of the data.
- //
- // If FLAG is 0, then we are relying on dead code elimination by the build
- // compiler to get rid of the useless block of code, and save a needless
- // branch at runtime.
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL(OP, 0); \
- return; \
- }
- #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, 0); \
- return; \
- }
- #else
- #define OP_GOMP_CRITICAL(OP, FLAG)
- #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- #if KMP_MIC
- #define KMP_DO_PAUSE _mm_delay_32(1)
- #else
- #define KMP_DO_PAUSE
- #endif /* KMP_MIC */
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- #define OP_CMPXCHG(TYPE, BITS, OP) \
- { \
- TYPE old_value, new_value; \
- old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP rhs); \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- KMP_DO_PAUSE; \
- \
- old_value = *(TYPE volatile *)lhs; \
- new_value = (TYPE)(old_value OP rhs); \
- } \
- }
- #if USE_CMPXCHG_FIX
- // 2007-06-25:
- // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
- // and win_32e are affected (I verified the asm). Compiler ignores the volatile
- // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
- // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
- // the workaround.
- #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
- { \
- struct _sss { \
- TYPE cmp; \
- kmp_int##BITS *vvv; \
- }; \
- struct _sss old_value, new_value; \
- old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
- new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
- *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
- new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
- *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
- KMP_DO_PAUSE; \
- \
- *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
- new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
- } \
- }
- // end of the first part of the workaround for C78287
- #endif // USE_CMPXCHG_FIX
- #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
- // Undo explicit type casts to get MSVC ARM64 to build. Uses
- // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
- #undef OP_CMPXCHG
- #define OP_CMPXCHG(TYPE, BITS, OP) \
- { \
- struct _sss { \
- TYPE cmp; \
- kmp_int##BITS *vvv; \
- }; \
- struct _sss old_value, new_value; \
- old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
- new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
- *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
- new_value.cmp = old_value.cmp OP rhs; \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
- *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
- KMP_DO_PAUSE; \
- \
- *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
- new_value.cmp = old_value.cmp OP rhs; \
- } \
- }
- #undef OP_UPDATE_CRITICAL
- #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- (*lhs) = (*lhs)OP rhs; \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ====================================
- #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
- KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG(TYPE, BITS, OP) \
- }
- #if USE_CMPXCHG_FIX
- // -------------------------------------------------------------------------
- // workaround for C78287 (complex(kind=4) data type)
- #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
- MASK, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
- }
- // end of the second part of the workaround for C78287
- #endif // USE_CMPXCHG_FIX
- #else
- // -------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
- KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
- } else { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, \
- LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, \
- LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- #if USE_CMPXCHG_FIX
- // -------------------------------------------------------------------------
- // workaround for C78287 (complex(kind=4) data type)
- #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
- MASK, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, \
- LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- // end of the second part of the workaround for C78287
- #endif // USE_CMPXCHG_FIX
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- // Routines for ATOMIC 4-byte operands addition and subtraction
- ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
- 0) // __kmpc_atomic_fixed4_add
- ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
- 0) // __kmpc_atomic_fixed4_sub
- ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_add
- ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub
- // Routines for ATOMIC 8-byte operands addition and subtraction
- ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
- ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
- ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_add
- ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub
- // ------------------------------------------------------------------------
- // Entries definition for integer operands
- // TYPE_ID - operands type and size (fixed4, float4)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operand type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator (used in critical section)
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- // MASK - used for alignment check
- // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
- // ------------------------------------------------------------------------
- // Routines for ATOMIC integer operands, other operators
- // ------------------------------------------------------------------------
- // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
- ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
- ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
- 0) // __kmpc_atomic_fixed1_andb
- ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
- ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
- ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
- ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
- 0) // __kmpc_atomic_fixed1_orb
- ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
- ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
- ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
- ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
- ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
- 0) // __kmpc_atomic_fixed1_xor
- ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
- ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
- 0) // __kmpc_atomic_fixed2_andb
- ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
- ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
- ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
- ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
- 0) // __kmpc_atomic_fixed2_orb
- ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
- ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
- ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
- ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
- ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
- 0) // __kmpc_atomic_fixed2_xor
- ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
- 0) // __kmpc_atomic_fixed4_andb
- ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
- ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
- ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
- ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
- 0) // __kmpc_atomic_fixed4_orb
- ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
- ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
- ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
- ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
- 0) // __kmpc_atomic_fixed4_xor
- ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
- ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
- ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
- ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
- ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
- ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
- ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
- ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
- ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
- ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div
- ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_mul
- ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div
- ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_mul
- // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
- /* ------------------------------------------------------------------------ */
- /* Routines for C/C++ Reduction operators && and || */
- // ------------------------------------------------------------------------
- // Need separate macros for &&, || because there is no combined assignment
- // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
- #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
- OP_CRITICAL(= *lhs OP, LCK_ID) \
- }
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ===================================
- #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
- OP_CMPXCHG(TYPE, BITS, OP) \
- }
- #else
- // ------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
- } \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
- ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
- ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
- ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
- ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
- 0) // __kmpc_atomic_fixed4_andl
- ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
- 0) // __kmpc_atomic_fixed4_orl
- ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
- ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
- /* ------------------------------------------------------------------------- */
- /* Routines for Fortran operators that matched no one in C: */
- /* MAX, MIN, .EQV., .NEQV. */
- /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
- /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
- // -------------------------------------------------------------------------
- // MIN and MAX need separate macros
- // OP - operator to check if we need any actions?
- #define MIN_MAX_CRITSECT(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (*lhs OP rhs) { /* still need actions? */ \
- *lhs = rhs; \
- } \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- // -------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- MIN_MAX_CRITSECT(OP, 0); \
- return; \
- }
- #else
- #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // -------------------------------------------------------------------------
- #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- while (old_value OP rhs && /* still need actions? */ \
- !KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, \
- *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- } \
- }
- // -------------------------------------------------------------------------
- // 1-byte, 2-byte operands - use critical section
- #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- if (*lhs OP rhs) { /* need actions? */ \
- GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
- MIN_MAX_CRITSECT(OP, LCK_ID) \
- } \
- }
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // -------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ====================================
- #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- if (*lhs OP rhs) { \
- GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
- MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
- } \
- }
- #else
- // -------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- if (*lhs OP rhs) { \
- GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
- } \
- } \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
- MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
- MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
- MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
- MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
- 0) // __kmpc_atomic_fixed4_max
- MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
- 0) // __kmpc_atomic_fixed4_min
- MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
- MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
- MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_max
- MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_min
- MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_max
- MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_min
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
- 1) // __kmpc_atomic_float10_max
- MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
- 1) // __kmpc_atomic_float10_min
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- #if KMP_HAVE_QUAD
- MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
- 1) // __kmpc_atomic_float16_max
- MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
- 1) // __kmpc_atomic_float16_min
- #if (KMP_ARCH_X86)
- MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
- 1) // __kmpc_atomic_float16_max_a16
- MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
- 1) // __kmpc_atomic_float16_min_a16
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // ------------------------------------------------------------------------
- // Need separate macros for .EQV. because of the need of complement (~)
- // OP ignored for critical sections, ^=~ used instead
- #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
- OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
- }
- // ------------------------------------------------------------------------
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ===================================
- #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
- OP_CMPXCHG(TYPE, BITS, OP) \
- }
- // ------------------------------------------------------------------------
- #else
- // ------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
- GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
- ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
- ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
- ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
- ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
- ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
- ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
- ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
- OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
- }
- /* ------------------------------------------------------------------------- */
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // routines for long double type
- ATOMIC_CRITICAL(float10, add, long double, +, 10r,
- 1) // __kmpc_atomic_float10_add
- ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
- 1) // __kmpc_atomic_float10_sub
- ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
- 1) // __kmpc_atomic_float10_mul
- ATOMIC_CRITICAL(float10, div, long double, /, 10r,
- 1) // __kmpc_atomic_float10_div
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- #if KMP_HAVE_QUAD
- // routines for _Quad type
- ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
- 1) // __kmpc_atomic_float16_add
- ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
- 1) // __kmpc_atomic_float16_sub
- ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
- 1) // __kmpc_atomic_float16_mul
- ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
- 1) // __kmpc_atomic_float16_div
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
- 1) // __kmpc_atomic_float16_add_a16
- ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
- 1) // __kmpc_atomic_float16_sub_a16
- ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
- 1) // __kmpc_atomic_float16_mul_a16
- ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
- 1) // __kmpc_atomic_float16_div_a16
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // routines for complex types
- #if USE_CMPXCHG_FIX
- // workaround for C78287 (complex(kind=4) data type)
- ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
- 1) // __kmpc_atomic_cmplx4_add
- ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
- 1) // __kmpc_atomic_cmplx4_sub
- ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
- 1) // __kmpc_atomic_cmplx4_mul
- ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
- 1) // __kmpc_atomic_cmplx4_div
- // end of the workaround for C78287
- #else
- ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
- ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
- ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
- ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
- #endif // USE_CMPXCHG_FIX
- ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
- ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
- ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
- ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
- 1) // __kmpc_atomic_cmplx10_add
- ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
- 1) // __kmpc_atomic_cmplx10_sub
- ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
- 1) // __kmpc_atomic_cmplx10_mul
- ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
- 1) // __kmpc_atomic_cmplx10_div
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
- 1) // __kmpc_atomic_cmplx16_add
- ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub
- ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
- 1) // __kmpc_atomic_cmplx16_mul
- ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
- 1) // __kmpc_atomic_cmplx16_add_a16
- ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_a16
- ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
- 1) // __kmpc_atomic_cmplx16_mul_a16
- ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_a16
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // OpenMP 4.0: x = expr binop x for non-commutative operations.
- // Supported only on IA-32 architecture and Intel(R) 64
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- (*lhs) = (TYPE)((rhs)OP(*lhs)); \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_REV(TYPE, OP, 0); \
- return; \
- }
- #else
- #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
- RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
- TYPE *lhs, TYPE rhs) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- // Note: temp_val introduced in order to force the compiler to read
- // *lhs only once (w/o it the compiler reads *lhs twice)
- #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value, new_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(rhs OP old_value); \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- KMP_DO_PAUSE; \
- \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(rhs OP old_value); \
- } \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_REV(TYPE, BITS, OP) \
- }
- // ------------------------------------------------------------------------
- // Entries definition for integer operands
- // TYPE_ID - operands type and size (fixed4, float4)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operand type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator (used in critical section)
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
- // ------------------------------------------------------------------------
- // Routines for ATOMIC integer operands, other operators
- // ------------------------------------------------------------------------
- // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
- ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
- ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
- ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
- ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
- ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
- ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
- ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
- ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
- ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
- ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
- ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
- ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
- ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
- ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
- ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
- ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
- ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
- ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
- ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
- ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
- ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
- ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
- ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
- ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
- ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
- ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
- ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
- ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
- // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
- OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
- }
- /* ------------------------------------------------------------------------- */
- // routines for long double type
- ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
- 1) // __kmpc_atomic_float10_sub_rev
- ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
- 1) // __kmpc_atomic_float10_div_rev
- #if KMP_HAVE_QUAD
- // routines for _Quad type
- ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
- 1) // __kmpc_atomic_float16_sub_rev
- ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
- 1) // __kmpc_atomic_float16_div_rev
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
- 1) // __kmpc_atomic_float16_sub_a16_rev
- ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
- 1) // __kmpc_atomic_float16_div_a16_rev
- #endif // KMP_ARCH_X86
- #endif // KMP_HAVE_QUAD
- // routines for complex types
- ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
- 1) // __kmpc_atomic_cmplx4_sub_rev
- ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
- 1) // __kmpc_atomic_cmplx4_div_rev
- ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
- 1) // __kmpc_atomic_cmplx8_sub_rev
- ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
- 1) // __kmpc_atomic_cmplx8_div_rev
- ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
- 1) // __kmpc_atomic_cmplx10_sub_rev
- ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
- 1) // __kmpc_atomic_cmplx10_div_rev
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_rev
- ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_rev
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_a16_rev
- ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_a16_rev
- #endif // KMP_ARCH_X86
- #endif // KMP_HAVE_QUAD
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
- /* ------------------------------------------------------------------------ */
- /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
- /* Note: in order to reduce the total number of types combinations */
- /* it is supposed that compiler converts RHS to longest floating type,*/
- /* that is _Quad, before call to any of these routines */
- /* Conversion to _Quad will be done by the compiler during calculation, */
- /* conversion back to TYPE - before the assignment, like: */
- /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
- /* Performance penalty expected because of SW emulation use */
- /* ------------------------------------------------------------------------ */
- #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
- ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, \
- ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
- gtid));
- // -------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
- GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
- OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
- }
- // -------------------------------------------------------------------------
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // -------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ====================================
- #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
- LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG(TYPE, BITS, OP) \
- }
- // -------------------------------------------------------------------------
- #else
- // ------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
- LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, \
- LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- // -------------------------------------------------------------------------
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
- RTYPE, LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_REV(TYPE, BITS, OP) \
- }
- #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
- LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
- OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- // RHS=float8
- ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
- ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
- ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
- ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
- 0) // __kmpc_atomic_fixed4_mul_float8
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
- 0) // __kmpc_atomic_fixed4_div_float8
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
- // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
- // use them)
- #if KMP_HAVE_QUAD
- ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
- ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
- ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
- ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
- ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
- ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
- ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
- ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_add_fp
- ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_add_fp
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_div_fp
- ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_div_fp
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
- ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
- ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
- ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
- ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
- ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
- ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
- ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
- ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_add_fp
- ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_sub_fp
- ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_mul_fp
- ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_div_fp
- // Reverse operations
- ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
- ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
- ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_sub_rev_fp
- ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_div_rev_fp
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- #endif // KMP_HAVE_QUAD
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // X86 or X86_64: no alignment problems ====================================
- #if USE_CMPXCHG_FIX
- // workaround for C78287 (complex(kind=4) data type)
- #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
- LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
- }
- // end of the second part of the workaround for C78287
- #else
- #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
- LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG(TYPE, BITS, OP) \
- }
- #endif // USE_CMPXCHG_FIX
- #else
- // ------------------------------------------------------------------------
- // Code for other architectures that don't handle unaligned accesses.
- #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
- LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
- OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
- if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
- OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
- } else { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL(TYPE, OP, \
- LCK_ID) /* unaligned address - use critical */ \
- } \
- }
- #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
- 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
- ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
- 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
- ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
- 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
- ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
- 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
- // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // ------------------------------------------------------------------------
- // Atomic READ routines
- // ------------------------------------------------------------------------
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
- RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
- TYPE *loc) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store_ret" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- // Note: temp_val introduced in order to force the compiler to read
- // *lhs only once (w/o it the compiler reads *lhs twice)
- // TODO: check if it is still necessary
- // Return old value regardless of the result of "compare & swap# operation
- #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- union f_i_union { \
- TYPE f_val; \
- kmp_int##BITS i_val; \
- }; \
- union f_i_union old_value; \
- temp_val = *loc; \
- old_value.f_val = temp_val; \
- old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
- (kmp_int##BITS *)loc, \
- *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
- *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
- new_value = old_value.f_val; \
- return new_value; \
- }
- // -------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL_READ(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- new_value = (*loc); \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- // -------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_READ(OP, 0); \
- return new_value; \
- }
- #else
- #define OP_GOMP_CRITICAL_READ(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // -------------------------------------------------------------------------
- #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
- new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
- return new_value; \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
- OP_CMPXCHG_READ(TYPE, BITS, OP) \
- }
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
- OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
- return new_value; \
- }
- // ------------------------------------------------------------------------
- // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
- // value doesn't work.
- // Let's return the read value through the additional parameter.
- #if (KMP_OS_WINDOWS)
- #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- (*out) = (*loc); \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_READ_WRK(OP, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
- void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
- TYPE *loc) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
- // ------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
- OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
- OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
- }
- #endif // KMP_OS_WINDOWS
- // ------------------------------------------------------------------------
- // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
- ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
- ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
- ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
- KMP_ARCH_X86) // __kmpc_atomic_float4_rd
- ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
- KMP_ARCH_X86) // __kmpc_atomic_float8_rd
- // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
- ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
- ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
- ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
- 1) // __kmpc_atomic_float10_rd
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
- 1) // __kmpc_atomic_float16_rd
- #endif // KMP_HAVE_QUAD
- // Fix for CQ220361 on Windows* OS
- #if (KMP_OS_WINDOWS)
- ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
- 1) // __kmpc_atomic_cmplx4_rd
- #else
- ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
- 1) // __kmpc_atomic_cmplx4_rd
- #endif // (KMP_OS_WINDOWS)
- ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
- 1) // __kmpc_atomic_cmplx8_rd
- ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
- 1) // __kmpc_atomic_cmplx10_rd
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
- 1) // __kmpc_atomic_cmplx16_rd
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
- 1) // __kmpc_atomic_float16_a16_rd
- ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
- 1) // __kmpc_atomic_cmplx16_a16_rd
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // ------------------------------------------------------------------------
- // Atomic WRITE routines
- #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
- KMP_XCHG_FIXED##BITS(lhs, rhs); \
- }
- // ------------------------------------------------------------------------
- #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
- KMP_XCHG_REAL##BITS(lhs, rhs); \
- }
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- // Note: temp_val introduced in order to force the compiler to read
- // *lhs only once (w/o it the compiler reads *lhs twice)
- #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value, new_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = rhs; \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = rhs; \
- } \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
- OP_CMPXCHG_WR(TYPE, BITS, OP) \
- }
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
- OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
- OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
- }
- // -------------------------------------------------------------------------
- ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
- ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
- ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
- #if (KMP_ARCH_X86)
- ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
- #else
- ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
- #endif // (KMP_ARCH_X86)
- ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
- KMP_ARCH_X86) // __kmpc_atomic_float4_wr
- #if (KMP_ARCH_X86)
- ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
- KMP_ARCH_X86) // __kmpc_atomic_float8_wr
- #else
- ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
- KMP_ARCH_X86) // __kmpc_atomic_float8_wr
- #endif // (KMP_ARCH_X86)
- ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
- 1) // __kmpc_atomic_float10_wr
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
- 1) // __kmpc_atomic_float16_wr
- #endif // KMP_HAVE_QUAD
- ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
- ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
- 1) // __kmpc_atomic_cmplx8_wr
- ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
- 1) // __kmpc_atomic_cmplx10_wr
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
- 1) // __kmpc_atomic_cmplx16_wr
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
- 1) // __kmpc_atomic_float16_a16_wr
- ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
- 1) // __kmpc_atomic_cmplx16_a16_wr
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // ------------------------------------------------------------------------
- // Atomic CAPTURE routines
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
- RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
- TYPE *lhs, TYPE rhs, int flag) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
- // -------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL_CPT(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- (*lhs) OP rhs; \
- new_value = (*lhs); \
- } else { \
- new_value = (*lhs); \
- (*lhs) OP rhs; \
- } \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return new_value;
- #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- (*lhs) = (TYPE)((*lhs)OP rhs); \
- new_value = (*lhs); \
- } else { \
- new_value = (*lhs); \
- (*lhs) = (TYPE)((*lhs)OP rhs); \
- } \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return new_value;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- // Note: temp_val introduced in order to force the compiler to read
- // *lhs only once (w/o it the compiler reads *lhs twice)
- #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value, new_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(old_value OP rhs); \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(old_value OP rhs); \
- } \
- if (flag) { \
- return new_value; \
- } else \
- return old_value; \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_CPT(TYPE, BITS, OP) \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE old_value, new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
- /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
- old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
- if (flag) { \
- return old_value OP rhs; \
- } else \
- return old_value; \
- }
- // -------------------------------------------------------------------------
- ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
- 0) // __kmpc_atomic_fixed4_add_cpt
- ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
- 0) // __kmpc_atomic_fixed4_sub_cpt
- ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
- ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
- ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
- KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
- ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
- ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
- KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
- ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
- // ------------------------------------------------------------------------
- // Entries definition for integer operands
- // TYPE_ID - operands type and size (fixed4, float4)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operand type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator (used in critical section)
- // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
- // ------------------------------------------------------------------------
- // Routines for ATOMIC integer operands, other operators
- // ------------------------------------------------------------------------
- // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
- ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
- 0) // __kmpc_atomic_fixed1_andb_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
- 0) // __kmpc_atomic_fixed1_orb_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
- ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
- 0) // __kmpc_atomic_fixed1_xor_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
- 0) // __kmpc_atomic_fixed2_andb_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
- 0) // __kmpc_atomic_fixed2_orb_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
- 0) // __kmpc_atomic_fixed2_xor_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
- 0) // __kmpc_atomic_fixed4_andb_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
- 0) // __kmpc_atomic_fixed4_orb_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
- 0) // __kmpc_atomic_fixed4_xor_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
- ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
- ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
- KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
- ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
- ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
- KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
- // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
- // CAPTURE routines for mixed types RHS=float16
- #if KMP_HAVE_QUAD
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
- TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
- ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, \
- ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
- gtid));
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
- RTYPE, LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_CPT(TYPE, BITS, OP) \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
- LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
- OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
- }
- ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
- 0) // __kmpc_atomic_fixed4u_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
- ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
- ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_add_cpt_fp
- ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_sub_cpt_fp
- ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_mul_cpt_fp
- ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
- 1) // __kmpc_atomic_float10_div_cpt_fp
- #endif // KMP_HAVE_QUAD
- // ------------------------------------------------------------------------
- // Routines for C/C++ Reduction operators && and ||
- // -------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- new_value OP rhs; \
- (*lhs) = new_value; \
- } else { \
- new_value = (*lhs); \
- (*lhs) OP rhs; \
- } \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_L_CPT(OP, 0); \
- return new_value; \
- }
- #else
- #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- // Need separate macros for &&, || because there is no combined assignment
- #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
- OP_CMPXCHG_CPT(TYPE, BITS, OP) \
- }
- ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
- ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
- ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
- ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
- ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
- 0) // __kmpc_atomic_fixed4_andl_cpt
- ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
- 0) // __kmpc_atomic_fixed4_orl_cpt
- ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
- ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
- // -------------------------------------------------------------------------
- // Routines for Fortran operators that matched no one in C:
- // MAX, MIN, .EQV., .NEQV.
- // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
- // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
- // -------------------------------------------------------------------------
- // MIN and MAX need separate macros
- // OP - operator to check if we need any actions?
- #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (*lhs OP rhs) { /* still need actions? */ \
- old_value = *lhs; \
- *lhs = rhs; \
- if (flag) \
- new_value = rhs; \
- else \
- new_value = old_value; \
- } else { \
- new_value = *lhs; \
- } \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return new_value;
- // -------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- MIN_MAX_CRITSECT_CPT(OP, 0); \
- }
- #else
- #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // -------------------------------------------------------------------------
- #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- /*TYPE old_value; */ \
- temp_val = *lhs; \
- old_value = temp_val; \
- while (old_value OP rhs && /* still need actions? */ \
- !KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, \
- *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- } \
- if (flag) \
- return rhs; \
- else \
- return old_value; \
- }
- // -------------------------------------------------------------------------
- // 1-byte, 2-byte operands - use critical section
- #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value, old_value; \
- if (*lhs OP rhs) { /* need actions? */ \
- GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
- MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
- } \
- return *lhs; \
- }
- #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value, old_value; \
- (void)new_value; \
- if (*lhs OP rhs) { \
- GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
- MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
- } \
- return *lhs; \
- }
- MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
- MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
- MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
- MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
- MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
- 0) // __kmpc_atomic_fixed4_max_cpt
- MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
- 0) // __kmpc_atomic_fixed4_min_cpt
- MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
- MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
- MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
- KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
- MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
- KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
- MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
- KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
- MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
- KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
- MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
- 1) // __kmpc_atomic_float10_max_cpt
- MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
- 1) // __kmpc_atomic_float10_min_cpt
- #if KMP_HAVE_QUAD
- MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
- 1) // __kmpc_atomic_float16_max_cpt
- MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
- 1) // __kmpc_atomic_float16_min_cpt
- #if (KMP_ARCH_X86)
- MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
- 1) // __kmpc_atomic_float16_max_a16_cpt
- MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
- 1) // __kmpc_atomic_float16_mix_a16_cpt
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_CPT(OP, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
- OP_CMPXCHG_CPT(TYPE, BITS, OP) \
- }
- // ------------------------------------------------------------------------
- ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
- ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
- ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
- ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
- ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
- ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
- ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
- ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
- OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
- }
- // ------------------------------------------------------------------------
- // Workaround for cmplx4. Regular routines with return value don't work
- // on Win_32e. Let's return captured values through the additional parameter.
- #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- (*lhs) OP rhs; \
- (*out) = (*lhs); \
- } else { \
- (*out) = (*lhs); \
- (*lhs) OP rhs; \
- } \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_CPT_WRK(OP## =, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
- void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
- TYPE rhs, TYPE *out, int flag) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
- // ------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
- OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
- OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
- }
- // The end of workaround for cmplx4
- /* ------------------------------------------------------------------------- */
- // routines for long double type
- ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
- 1) // __kmpc_atomic_float10_add_cpt
- ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
- 1) // __kmpc_atomic_float10_sub_cpt
- ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
- 1) // __kmpc_atomic_float10_mul_cpt
- ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
- 1) // __kmpc_atomic_float10_div_cpt
- #if KMP_HAVE_QUAD
- // routines for _Quad type
- ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
- 1) // __kmpc_atomic_float16_add_cpt
- ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
- 1) // __kmpc_atomic_float16_sub_cpt
- ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
- 1) // __kmpc_atomic_float16_mul_cpt
- ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
- 1) // __kmpc_atomic_float16_div_cpt
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
- 1) // __kmpc_atomic_float16_add_a16_cpt
- ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
- 1) // __kmpc_atomic_float16_sub_a16_cpt
- ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
- 1) // __kmpc_atomic_float16_mul_a16_cpt
- ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
- 1) // __kmpc_atomic_float16_div_a16_cpt
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // routines for complex types
- // cmplx4 routines to return void
- ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
- 1) // __kmpc_atomic_cmplx4_add_cpt
- ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
- 1) // __kmpc_atomic_cmplx4_sub_cpt
- ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
- 1) // __kmpc_atomic_cmplx4_mul_cpt
- ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
- 1) // __kmpc_atomic_cmplx4_div_cpt
- ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
- 1) // __kmpc_atomic_cmplx8_add_cpt
- ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
- 1) // __kmpc_atomic_cmplx8_sub_cpt
- ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
- 1) // __kmpc_atomic_cmplx8_mul_cpt
- ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
- 1) // __kmpc_atomic_cmplx8_div_cpt
- ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
- 1) // __kmpc_atomic_cmplx10_add_cpt
- ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
- 1) // __kmpc_atomic_cmplx10_sub_cpt
- ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
- 1) // __kmpc_atomic_cmplx10_mul_cpt
- ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
- 1) // __kmpc_atomic_cmplx10_div_cpt
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
- 1) // __kmpc_atomic_cmplx16_add_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
- 1) // __kmpc_atomic_cmplx16_mul_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_cpt
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
- 1) // __kmpc_atomic_cmplx16_add_a16_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
- 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
- ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_a16_cpt
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
- // binop x; v = x; } for non-commutative operations.
- // Supported only on IA-32 architecture and Intel(R) 64
- // -------------------------------------------------------------------------
- // Operation on *lhs, rhs bound by critical section
- // OP - operator (it's supposed to contain an assignment)
- // LCK_ID - lock identifier
- // Note: don't check gtid as it should always be valid
- // 1, 2-byte - expect valid parameter, other - check before this macro
- #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- /*temp_val = (*lhs);*/ \
- (*lhs) = (TYPE)((rhs)OP(*lhs)); \
- new_value = (*lhs); \
- } else { \
- new_value = (*lhs); \
- (*lhs) = (TYPE)((rhs)OP(*lhs)); \
- } \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return new_value;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- // Operation on *lhs, rhs using "compare_and_store" routine
- // TYPE - operands' type
- // BITS - size in bits, used to distinguish low level calls
- // OP - operator
- // Note: temp_val introduced in order to force the compiler to read
- // *lhs only once (w/o it the compiler reads *lhs twice)
- #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value, new_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(rhs OP old_value); \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = (TYPE)(rhs OP old_value); \
- } \
- if (flag) { \
- return new_value; \
- } else \
- return old_value; \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
- }
- ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
- ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
- // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- // TYPE_ID, OP_ID, TYPE - detailed above
- // OP - operator
- // LCK_ID - lock identifier, used to possibly distinguish lock variable
- #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
- TYPE new_value; \
- /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
- OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
- OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
- }
- /* ------------------------------------------------------------------------- */
- // routines for long double type
- ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
- 1) // __kmpc_atomic_float10_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
- 1) // __kmpc_atomic_float10_div_cpt_rev
- #if KMP_HAVE_QUAD
- // routines for _Quad type
- ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
- 1) // __kmpc_atomic_float16_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
- 1) // __kmpc_atomic_float16_div_cpt_rev
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
- 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
- 1) // __kmpc_atomic_float16_div_a16_cpt_rev
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // routines for complex types
- // ------------------------------------------------------------------------
- // Workaround for cmplx4. Regular routines with return value don't work
- // on Win_32e. Let's return captured values through the additional parameter.
- #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- if (flag) { \
- (*lhs) = (rhs)OP(*lhs); \
- (*out) = (*lhs); \
- } else { \
- (*out) = (*lhs); \
- (*lhs) = (rhs)OP(*lhs); \
- } \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- OP_CRITICAL_CPT_REV_WRK(OP, 0); \
- }
- #else
- #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
- GOMP_FLAG) \
- ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
- OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
- OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
- }
- // The end of workaround for cmplx4
- // !!! TODO: check if we need to return void for cmplx4 routines
- // cmplx4 routines to return void
- ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
- 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
- 1) // __kmpc_atomic_cmplx4_div_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
- 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
- 1) // __kmpc_atomic_cmplx8_div_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
- 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
- 1) // __kmpc_atomic_cmplx10_div_cpt_rev
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_cpt_rev
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
- 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
- ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
- 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // Capture reverse for mixed type: RHS=float16
- #if KMP_HAVE_QUAD
- // Beginning of a definition (provides name, parameters, gebug trace)
- // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
- // fixed)
- // OP_ID - operation identifier (add, sub, mul, ...)
- // TYPE - operands' type
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
- RTYPE, LCK_ID, MASK, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
- OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
- LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
- TYPE new_value; \
- (void)new_value; \
- OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
- OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
- }
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
- KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
- 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
- 1,
- KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
- 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
- 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
- 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
- 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
- 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
- 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
- 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
- 8i, 7,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
- 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
- 4r, 3,
- KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
- 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
- ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
- 8r, 7,
- KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
- ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
- 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
- ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
- 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
- #endif // KMP_HAVE_QUAD
- // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
- #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
- TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
- TYPE rhs) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
- #define CRITICAL_SWP(LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- old_value = (*lhs); \
- (*lhs) = rhs; \
- \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return old_value;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define GOMP_CRITICAL_SWP(FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- CRITICAL_SWP(0); \
- }
- #else
- #define GOMP_CRITICAL_SWP(FLAG)
- #endif /* KMP_GOMP_COMPAT */
- #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
- ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
- TYPE old_value; \
- GOMP_CRITICAL_SWP(GOMP_FLAG) \
- old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
- return old_value; \
- }
- // ------------------------------------------------------------------------
- #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
- ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
- TYPE old_value; \
- GOMP_CRITICAL_SWP(GOMP_FLAG) \
- old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
- return old_value; \
- }
- // ------------------------------------------------------------------------
- #define CMPXCHG_SWP(TYPE, BITS) \
- { \
- TYPE KMP_ATOMIC_VOLATILE temp_val; \
- TYPE old_value, new_value; \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = rhs; \
- while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
- (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
- *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
- temp_val = *lhs; \
- old_value = temp_val; \
- new_value = rhs; \
- } \
- return old_value; \
- }
- // -------------------------------------------------------------------------
- #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
- ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
- TYPE old_value; \
- (void)old_value; \
- GOMP_CRITICAL_SWP(GOMP_FLAG) \
- CMPXCHG_SWP(TYPE, BITS) \
- }
- ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
- ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
- ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
- ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
- KMP_ARCH_X86) // __kmpc_atomic_float4_swp
- #if (KMP_ARCH_X86)
- ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
- KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
- ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
- KMP_ARCH_X86) // __kmpc_atomic_float8_swp
- #else
- ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
- ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
- KMP_ARCH_X86) // __kmpc_atomic_float8_swp
- #endif // (KMP_ARCH_X86)
- // ------------------------------------------------------------------------
- // Routines for Extended types: long double, _Quad, complex flavours (use
- // critical section)
- #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
- TYPE old_value; \
- GOMP_CRITICAL_SWP(GOMP_FLAG) \
- CRITICAL_SWP(LCK_ID) \
- }
- // ------------------------------------------------------------------------
- // !!! TODO: check if we need to return void for cmplx4 routines
- // Workaround for cmplx4. Regular routines with return value don't work
- // on Win_32e. Let's return captured values through the additional parameter.
- #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
- void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
- TYPE rhs, TYPE *out) { \
- KMP_DEBUG_ASSERT(__kmp_init_serial); \
- KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
- #define CRITICAL_SWP_WRK(LCK_ID) \
- __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- \
- tmp = (*lhs); \
- (*lhs) = (rhs); \
- (*out) = tmp; \
- __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
- return;
- // ------------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- #define GOMP_CRITICAL_SWP_WRK(FLAG) \
- if ((FLAG) && (__kmp_atomic_mode == 2)) { \
- KMP_CHECK_GTID; \
- CRITICAL_SWP_WRK(0); \
- }
- #else
- #define GOMP_CRITICAL_SWP_WRK(FLAG)
- #endif /* KMP_GOMP_COMPAT */
- // ------------------------------------------------------------------------
- #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
- ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
- TYPE tmp; \
- GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
- CRITICAL_SWP_WRK(LCK_ID) \
- }
- // The end of workaround for cmplx4
- ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
- #endif // KMP_HAVE_QUAD
- // cmplx4 routine to return void
- ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
- // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
- // __kmpc_atomic_cmplx4_swp
- ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
- ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
- #if KMP_HAVE_QUAD
- ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
- #if (KMP_ARCH_X86)
- ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
- 1) // __kmpc_atomic_float16_a16_swp
- ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
- 1) // __kmpc_atomic_cmplx16_a16_swp
- #endif // (KMP_ARCH_X86)
- #endif // KMP_HAVE_QUAD
- // End of OpenMP 4.0 Capture
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- #undef OP_CRITICAL
- /* ------------------------------------------------------------------------ */
- /* Generic atomic routines */
- void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (
- #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- FALSE /* must use lock */
- #else
- TRUE
- #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- ) {
- kmp_int8 old_value, new_value;
- old_value = *(kmp_int8 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- /* TODO: Should this be acquire or release? */
- while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
- *(kmp_int8 *)&new_value)) {
- KMP_CPU_PAUSE();
- old_value = *(kmp_int8 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- }
- return;
- } else {
- // All 1-byte data is of integer data type.
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
- }
- }
- void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- if (
- #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- FALSE /* must use lock */
- #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
- TRUE /* no alignment problems */
- #else
- !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
- #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- ) {
- kmp_int16 old_value, new_value;
- old_value = *(kmp_int16 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- /* TODO: Should this be acquire or release? */
- while (!KMP_COMPARE_AND_STORE_ACQ16(
- (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
- KMP_CPU_PAUSE();
- old_value = *(kmp_int16 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- }
- return;
- } else {
- // All 2-byte data is of integer data type.
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
- }
- }
- void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (
- // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
- // Gomp compatibility is broken if this routine is called for floats.
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- TRUE /* no alignment problems */
- #else
- !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- ) {
- kmp_int32 old_value, new_value;
- old_value = *(kmp_int32 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- /* TODO: Should this be acquire or release? */
- while (!KMP_COMPARE_AND_STORE_ACQ32(
- (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
- KMP_CPU_PAUSE();
- old_value = *(kmp_int32 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- }
- return;
- } else {
- // Use __kmp_atomic_lock_4i for all 4-byte data,
- // even if it isn't of integer data type.
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
- }
- }
- void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- if (
- #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- FALSE /* must use lock */
- #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
- TRUE /* no alignment problems */
- #else
- !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
- #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
- ) {
- kmp_int64 old_value, new_value;
- old_value = *(kmp_int64 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- /* TODO: Should this be acquire or release? */
- while (!KMP_COMPARE_AND_STORE_ACQ64(
- (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
- KMP_CPU_PAUSE();
- old_value = *(kmp_int64 *)lhs;
- (*f)(&new_value, &old_value, rhs);
- }
- return;
- } else {
- // Use __kmp_atomic_lock_8i for all 8-byte data,
- // even if it isn't of integer data type.
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
- }
- }
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
- }
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
- }
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
- }
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
- void (*f)(void *, void *, void *)) {
- KMP_DEBUG_ASSERT(__kmp_init_serial);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
- (*f)(lhs, lhs, rhs);
- #ifdef KMP_GOMP_COMPAT
- if (__kmp_atomic_mode == 2) {
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- } else
- #endif /* KMP_GOMP_COMPAT */
- __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
- }
- // AC: same two routines as GOMP_atomic_start/end, but will be called by our
- // compiler; duplicated in order to not use 3-party names in pure Intel code
- // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
- void __kmpc_atomic_start(void) {
- int gtid = __kmp_entry_gtid();
- KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
- __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
- }
- void __kmpc_atomic_end(void) {
- int gtid = __kmp_get_gtid();
- KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
- __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
- }
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- // OpenMP 5.1 compare and swap
- /*!
- @param loc Source code location
- @param gtid Global thread id
- @param x Memory location to operate on
- @param e Expected value
- @param d Desired value
- @return Result of comparison
- Implements Compare And Swap atomic operation.
- Sample code:
- #pragma omp atomic compare update capture
- { r = x == e; if(r) { x = d; } }
- */
- bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
- return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
- }
- bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
- short d) {
- return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
- }
- bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
- kmp_int32 d) {
- return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
- }
- bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
- kmp_int64 d) {
- return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
- }
- /*!
- @param loc Source code location
- @param gtid Global thread id
- @param x Memory location to operate on
- @param e Expected value
- @param d Desired value
- @return Old value of x
- Implements Compare And Swap atomic operation.
- Sample code:
- #pragma omp atomic compare update capture
- { v = x; if (x == e) { x = d; } }
- */
- char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
- return KMP_COMPARE_AND_STORE_RET8(x, e, d);
- }
- short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
- short d) {
- return KMP_COMPARE_AND_STORE_RET16(x, e, d);
- }
- kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
- kmp_int32 e, kmp_int32 d) {
- return KMP_COMPARE_AND_STORE_RET32(x, e, d);
- }
- kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
- kmp_int64 e, kmp_int64 d) {
- return KMP_COMPARE_AND_STORE_RET64(x, e, d);
- }
- /*!
- @param loc Source code location
- @param gtid Global thread id
- @param x Memory location to operate on
- @param e Expected value
- @param d Desired value
- @param pv Captured value location
- @return Result of comparison
- Implements Compare And Swap + Capture atomic operation.
- v gets old valie of x if comparison failed, untouched otherwise.
- Sample code:
- #pragma omp atomic compare update capture
- { r = x == e; if(r) { x = d; } else { v = x; } }
- */
- bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
- char d, char *pv) {
- char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
- if (old == e)
- return true;
- KMP_ASSERT(pv != NULL);
- *pv = old;
- return false;
- }
- bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
- short d, short *pv) {
- short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
- if (old == e)
- return true;
- KMP_ASSERT(pv != NULL);
- *pv = old;
- return false;
- }
- bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
- kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
- kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
- if (old == e)
- return true;
- KMP_ASSERT(pv != NULL);
- *pv = old;
- return false;
- }
- bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
- kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
- kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
- if (old == e)
- return true;
- KMP_ASSERT(pv != NULL);
- *pv = old;
- return false;
- }
- /*!
- @param loc Source code location
- @param gtid Global thread id
- @param x Memory location to operate on
- @param e Expected value
- @param d Desired value
- @param pv Captured value location
- @return Old value of x
- Implements Compare And Swap + Capture atomic operation.
- v gets new valie of x.
- Sample code:
- #pragma omp atomic compare update capture
- { if (x == e) { x = d; }; v = x; }
- */
- char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
- char d, char *pv) {
- char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
- KMP_ASSERT(pv != NULL);
- *pv = old == e ? d : old;
- return old;
- }
- short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
- short d, short *pv) {
- short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
- KMP_ASSERT(pv != NULL);
- *pv = old == e ? d : old;
- return old;
- }
- kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
- kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
- kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
- KMP_ASSERT(pv != NULL);
- *pv = old == e ? d : old;
- return old;
- }
- kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
- kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
- kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
- KMP_ASSERT(pv != NULL);
- *pv = old == e ? d : old;
- return old;
- }
- // End OpenMP 5.1 compare + capture
- #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
- /*!
- @}
- */
- // end of file
|