kmp_atomic.cpp 190 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877
  1. /*
  2. * kmp_atomic.cpp -- ATOMIC implementation routines
  3. */
  4. //===----------------------------------------------------------------------===//
  5. //
  6. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  7. // See https://llvm.org/LICENSE.txt for license information.
  8. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "kmp_atomic.h"
  12. #include "kmp.h" // TRUE, asm routines prototypes
  13. typedef unsigned char uchar;
  14. typedef unsigned short ushort;
  15. /*!
  16. @defgroup ATOMIC_OPS Atomic Operations
  17. These functions are used for implementing the many different varieties of atomic
  18. operations.
  19. The compiler is at liberty to inline atomic operations that are naturally
  20. supported by the target architecture. For instance on IA-32 architecture an
  21. atomic like this can be inlined
  22. @code
  23. static int s = 0;
  24. #pragma omp atomic
  25. s++;
  26. @endcode
  27. using the single instruction: `lock; incl s`
  28. However the runtime does provide entrypoints for these operations to support
  29. compilers that choose not to inline them. (For instance,
  30. `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
  31. The names of the functions are encoded by using the data type name and the
  32. operation name, as in these tables.
  33. Data Type | Data type encoding
  34. -----------|---------------
  35. int8_t | `fixed1`
  36. uint8_t | `fixed1u`
  37. int16_t | `fixed2`
  38. uint16_t | `fixed2u`
  39. int32_t | `fixed4`
  40. uint32_t | `fixed4u`
  41. int32_t | `fixed8`
  42. uint32_t | `fixed8u`
  43. float | `float4`
  44. double | `float8`
  45. float 10 (8087 eighty bit float) | `float10`
  46. complex<float> | `cmplx4`
  47. complex<double> | `cmplx8`
  48. complex<float10> | `cmplx10`
  49. <br>
  50. Operation | Operation encoding
  51. ----------|-------------------
  52. + | add
  53. - | sub
  54. \* | mul
  55. / | div
  56. & | andb
  57. << | shl
  58. \>\> | shr
  59. \| | orb
  60. ^ | xor
  61. && | andl
  62. \|\| | orl
  63. maximum | max
  64. minimum | min
  65. .eqv. | eqv
  66. .neqv. | neqv
  67. <br>
  68. For non-commutative operations, `_rev` can also be added for the reversed
  69. operation. For the functions that capture the result, the suffix `_cpt` is
  70. added.
  71. Update Functions
  72. ================
  73. The general form of an atomic function that just performs an update (without a
  74. `capture`)
  75. @code
  76. void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
  77. lhs, TYPE rhs );
  78. @endcode
  79. @param ident_t a pointer to source location
  80. @param gtid the global thread id
  81. @param lhs a pointer to the left operand
  82. @param rhs the right operand
  83. `capture` functions
  84. ===================
  85. The capture functions perform an atomic update and return a result, which is
  86. either the value before the capture, or that after. They take an additional
  87. argument to determine which result is returned.
  88. Their general form is therefore
  89. @code
  90. TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
  91. lhs, TYPE rhs, int flag );
  92. @endcode
  93. @param ident_t a pointer to source location
  94. @param gtid the global thread id
  95. @param lhs a pointer to the left operand
  96. @param rhs the right operand
  97. @param flag one if the result is to be captured *after* the operation, zero if
  98. captured *before*.
  99. The one set of exceptions to this is the `complex<float>` type where the value
  100. is not returned, rather an extra argument pointer is passed.
  101. They look like
  102. @code
  103. void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
  104. lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
  105. @endcode
  106. Read and Write Operations
  107. =========================
  108. The OpenMP<sup>*</sup> standard now supports atomic operations that simply
  109. ensure that the value is read or written atomically, with no modification
  110. performed. In many cases on IA-32 architecture these operations can be inlined
  111. since the architecture guarantees that no tearing occurs on aligned objects
  112. accessed with a single memory operation of up to 64 bits in size.
  113. The general form of the read operations is
  114. @code
  115. TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
  116. @endcode
  117. For the write operations the form is
  118. @code
  119. void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
  120. );
  121. @endcode
  122. Full list of functions
  123. ======================
  124. This leads to the generation of 376 atomic functions, as follows.
  125. Functions for integers
  126. ---------------------
  127. There are versions here for integers of size 1,2,4 and 8 bytes both signed and
  128. unsigned (where that matters).
  129. @code
  130. __kmpc_atomic_fixed1_add
  131. __kmpc_atomic_fixed1_add_cpt
  132. __kmpc_atomic_fixed1_add_fp
  133. __kmpc_atomic_fixed1_andb
  134. __kmpc_atomic_fixed1_andb_cpt
  135. __kmpc_atomic_fixed1_andl
  136. __kmpc_atomic_fixed1_andl_cpt
  137. __kmpc_atomic_fixed1_div
  138. __kmpc_atomic_fixed1_div_cpt
  139. __kmpc_atomic_fixed1_div_cpt_rev
  140. __kmpc_atomic_fixed1_div_float8
  141. __kmpc_atomic_fixed1_div_fp
  142. __kmpc_atomic_fixed1_div_rev
  143. __kmpc_atomic_fixed1_eqv
  144. __kmpc_atomic_fixed1_eqv_cpt
  145. __kmpc_atomic_fixed1_max
  146. __kmpc_atomic_fixed1_max_cpt
  147. __kmpc_atomic_fixed1_min
  148. __kmpc_atomic_fixed1_min_cpt
  149. __kmpc_atomic_fixed1_mul
  150. __kmpc_atomic_fixed1_mul_cpt
  151. __kmpc_atomic_fixed1_mul_float8
  152. __kmpc_atomic_fixed1_mul_fp
  153. __kmpc_atomic_fixed1_neqv
  154. __kmpc_atomic_fixed1_neqv_cpt
  155. __kmpc_atomic_fixed1_orb
  156. __kmpc_atomic_fixed1_orb_cpt
  157. __kmpc_atomic_fixed1_orl
  158. __kmpc_atomic_fixed1_orl_cpt
  159. __kmpc_atomic_fixed1_rd
  160. __kmpc_atomic_fixed1_shl
  161. __kmpc_atomic_fixed1_shl_cpt
  162. __kmpc_atomic_fixed1_shl_cpt_rev
  163. __kmpc_atomic_fixed1_shl_rev
  164. __kmpc_atomic_fixed1_shr
  165. __kmpc_atomic_fixed1_shr_cpt
  166. __kmpc_atomic_fixed1_shr_cpt_rev
  167. __kmpc_atomic_fixed1_shr_rev
  168. __kmpc_atomic_fixed1_sub
  169. __kmpc_atomic_fixed1_sub_cpt
  170. __kmpc_atomic_fixed1_sub_cpt_rev
  171. __kmpc_atomic_fixed1_sub_fp
  172. __kmpc_atomic_fixed1_sub_rev
  173. __kmpc_atomic_fixed1_swp
  174. __kmpc_atomic_fixed1_wr
  175. __kmpc_atomic_fixed1_xor
  176. __kmpc_atomic_fixed1_xor_cpt
  177. __kmpc_atomic_fixed1u_add_fp
  178. __kmpc_atomic_fixed1u_sub_fp
  179. __kmpc_atomic_fixed1u_mul_fp
  180. __kmpc_atomic_fixed1u_div
  181. __kmpc_atomic_fixed1u_div_cpt
  182. __kmpc_atomic_fixed1u_div_cpt_rev
  183. __kmpc_atomic_fixed1u_div_fp
  184. __kmpc_atomic_fixed1u_div_rev
  185. __kmpc_atomic_fixed1u_shr
  186. __kmpc_atomic_fixed1u_shr_cpt
  187. __kmpc_atomic_fixed1u_shr_cpt_rev
  188. __kmpc_atomic_fixed1u_shr_rev
  189. __kmpc_atomic_fixed2_add
  190. __kmpc_atomic_fixed2_add_cpt
  191. __kmpc_atomic_fixed2_add_fp
  192. __kmpc_atomic_fixed2_andb
  193. __kmpc_atomic_fixed2_andb_cpt
  194. __kmpc_atomic_fixed2_andl
  195. __kmpc_atomic_fixed2_andl_cpt
  196. __kmpc_atomic_fixed2_div
  197. __kmpc_atomic_fixed2_div_cpt
  198. __kmpc_atomic_fixed2_div_cpt_rev
  199. __kmpc_atomic_fixed2_div_float8
  200. __kmpc_atomic_fixed2_div_fp
  201. __kmpc_atomic_fixed2_div_rev
  202. __kmpc_atomic_fixed2_eqv
  203. __kmpc_atomic_fixed2_eqv_cpt
  204. __kmpc_atomic_fixed2_max
  205. __kmpc_atomic_fixed2_max_cpt
  206. __kmpc_atomic_fixed2_min
  207. __kmpc_atomic_fixed2_min_cpt
  208. __kmpc_atomic_fixed2_mul
  209. __kmpc_atomic_fixed2_mul_cpt
  210. __kmpc_atomic_fixed2_mul_float8
  211. __kmpc_atomic_fixed2_mul_fp
  212. __kmpc_atomic_fixed2_neqv
  213. __kmpc_atomic_fixed2_neqv_cpt
  214. __kmpc_atomic_fixed2_orb
  215. __kmpc_atomic_fixed2_orb_cpt
  216. __kmpc_atomic_fixed2_orl
  217. __kmpc_atomic_fixed2_orl_cpt
  218. __kmpc_atomic_fixed2_rd
  219. __kmpc_atomic_fixed2_shl
  220. __kmpc_atomic_fixed2_shl_cpt
  221. __kmpc_atomic_fixed2_shl_cpt_rev
  222. __kmpc_atomic_fixed2_shl_rev
  223. __kmpc_atomic_fixed2_shr
  224. __kmpc_atomic_fixed2_shr_cpt
  225. __kmpc_atomic_fixed2_shr_cpt_rev
  226. __kmpc_atomic_fixed2_shr_rev
  227. __kmpc_atomic_fixed2_sub
  228. __kmpc_atomic_fixed2_sub_cpt
  229. __kmpc_atomic_fixed2_sub_cpt_rev
  230. __kmpc_atomic_fixed2_sub_fp
  231. __kmpc_atomic_fixed2_sub_rev
  232. __kmpc_atomic_fixed2_swp
  233. __kmpc_atomic_fixed2_wr
  234. __kmpc_atomic_fixed2_xor
  235. __kmpc_atomic_fixed2_xor_cpt
  236. __kmpc_atomic_fixed2u_add_fp
  237. __kmpc_atomic_fixed2u_sub_fp
  238. __kmpc_atomic_fixed2u_mul_fp
  239. __kmpc_atomic_fixed2u_div
  240. __kmpc_atomic_fixed2u_div_cpt
  241. __kmpc_atomic_fixed2u_div_cpt_rev
  242. __kmpc_atomic_fixed2u_div_fp
  243. __kmpc_atomic_fixed2u_div_rev
  244. __kmpc_atomic_fixed2u_shr
  245. __kmpc_atomic_fixed2u_shr_cpt
  246. __kmpc_atomic_fixed2u_shr_cpt_rev
  247. __kmpc_atomic_fixed2u_shr_rev
  248. __kmpc_atomic_fixed4_add
  249. __kmpc_atomic_fixed4_add_cpt
  250. __kmpc_atomic_fixed4_add_fp
  251. __kmpc_atomic_fixed4_andb
  252. __kmpc_atomic_fixed4_andb_cpt
  253. __kmpc_atomic_fixed4_andl
  254. __kmpc_atomic_fixed4_andl_cpt
  255. __kmpc_atomic_fixed4_div
  256. __kmpc_atomic_fixed4_div_cpt
  257. __kmpc_atomic_fixed4_div_cpt_rev
  258. __kmpc_atomic_fixed4_div_float8
  259. __kmpc_atomic_fixed4_div_fp
  260. __kmpc_atomic_fixed4_div_rev
  261. __kmpc_atomic_fixed4_eqv
  262. __kmpc_atomic_fixed4_eqv_cpt
  263. __kmpc_atomic_fixed4_max
  264. __kmpc_atomic_fixed4_max_cpt
  265. __kmpc_atomic_fixed4_min
  266. __kmpc_atomic_fixed4_min_cpt
  267. __kmpc_atomic_fixed4_mul
  268. __kmpc_atomic_fixed4_mul_cpt
  269. __kmpc_atomic_fixed4_mul_float8
  270. __kmpc_atomic_fixed4_mul_fp
  271. __kmpc_atomic_fixed4_neqv
  272. __kmpc_atomic_fixed4_neqv_cpt
  273. __kmpc_atomic_fixed4_orb
  274. __kmpc_atomic_fixed4_orb_cpt
  275. __kmpc_atomic_fixed4_orl
  276. __kmpc_atomic_fixed4_orl_cpt
  277. __kmpc_atomic_fixed4_rd
  278. __kmpc_atomic_fixed4_shl
  279. __kmpc_atomic_fixed4_shl_cpt
  280. __kmpc_atomic_fixed4_shl_cpt_rev
  281. __kmpc_atomic_fixed4_shl_rev
  282. __kmpc_atomic_fixed4_shr
  283. __kmpc_atomic_fixed4_shr_cpt
  284. __kmpc_atomic_fixed4_shr_cpt_rev
  285. __kmpc_atomic_fixed4_shr_rev
  286. __kmpc_atomic_fixed4_sub
  287. __kmpc_atomic_fixed4_sub_cpt
  288. __kmpc_atomic_fixed4_sub_cpt_rev
  289. __kmpc_atomic_fixed4_sub_fp
  290. __kmpc_atomic_fixed4_sub_rev
  291. __kmpc_atomic_fixed4_swp
  292. __kmpc_atomic_fixed4_wr
  293. __kmpc_atomic_fixed4_xor
  294. __kmpc_atomic_fixed4_xor_cpt
  295. __kmpc_atomic_fixed4u_add_fp
  296. __kmpc_atomic_fixed4u_sub_fp
  297. __kmpc_atomic_fixed4u_mul_fp
  298. __kmpc_atomic_fixed4u_div
  299. __kmpc_atomic_fixed4u_div_cpt
  300. __kmpc_atomic_fixed4u_div_cpt_rev
  301. __kmpc_atomic_fixed4u_div_fp
  302. __kmpc_atomic_fixed4u_div_rev
  303. __kmpc_atomic_fixed4u_shr
  304. __kmpc_atomic_fixed4u_shr_cpt
  305. __kmpc_atomic_fixed4u_shr_cpt_rev
  306. __kmpc_atomic_fixed4u_shr_rev
  307. __kmpc_atomic_fixed8_add
  308. __kmpc_atomic_fixed8_add_cpt
  309. __kmpc_atomic_fixed8_add_fp
  310. __kmpc_atomic_fixed8_andb
  311. __kmpc_atomic_fixed8_andb_cpt
  312. __kmpc_atomic_fixed8_andl
  313. __kmpc_atomic_fixed8_andl_cpt
  314. __kmpc_atomic_fixed8_div
  315. __kmpc_atomic_fixed8_div_cpt
  316. __kmpc_atomic_fixed8_div_cpt_rev
  317. __kmpc_atomic_fixed8_div_float8
  318. __kmpc_atomic_fixed8_div_fp
  319. __kmpc_atomic_fixed8_div_rev
  320. __kmpc_atomic_fixed8_eqv
  321. __kmpc_atomic_fixed8_eqv_cpt
  322. __kmpc_atomic_fixed8_max
  323. __kmpc_atomic_fixed8_max_cpt
  324. __kmpc_atomic_fixed8_min
  325. __kmpc_atomic_fixed8_min_cpt
  326. __kmpc_atomic_fixed8_mul
  327. __kmpc_atomic_fixed8_mul_cpt
  328. __kmpc_atomic_fixed8_mul_float8
  329. __kmpc_atomic_fixed8_mul_fp
  330. __kmpc_atomic_fixed8_neqv
  331. __kmpc_atomic_fixed8_neqv_cpt
  332. __kmpc_atomic_fixed8_orb
  333. __kmpc_atomic_fixed8_orb_cpt
  334. __kmpc_atomic_fixed8_orl
  335. __kmpc_atomic_fixed8_orl_cpt
  336. __kmpc_atomic_fixed8_rd
  337. __kmpc_atomic_fixed8_shl
  338. __kmpc_atomic_fixed8_shl_cpt
  339. __kmpc_atomic_fixed8_shl_cpt_rev
  340. __kmpc_atomic_fixed8_shl_rev
  341. __kmpc_atomic_fixed8_shr
  342. __kmpc_atomic_fixed8_shr_cpt
  343. __kmpc_atomic_fixed8_shr_cpt_rev
  344. __kmpc_atomic_fixed8_shr_rev
  345. __kmpc_atomic_fixed8_sub
  346. __kmpc_atomic_fixed8_sub_cpt
  347. __kmpc_atomic_fixed8_sub_cpt_rev
  348. __kmpc_atomic_fixed8_sub_fp
  349. __kmpc_atomic_fixed8_sub_rev
  350. __kmpc_atomic_fixed8_swp
  351. __kmpc_atomic_fixed8_wr
  352. __kmpc_atomic_fixed8_xor
  353. __kmpc_atomic_fixed8_xor_cpt
  354. __kmpc_atomic_fixed8u_add_fp
  355. __kmpc_atomic_fixed8u_sub_fp
  356. __kmpc_atomic_fixed8u_mul_fp
  357. __kmpc_atomic_fixed8u_div
  358. __kmpc_atomic_fixed8u_div_cpt
  359. __kmpc_atomic_fixed8u_div_cpt_rev
  360. __kmpc_atomic_fixed8u_div_fp
  361. __kmpc_atomic_fixed8u_div_rev
  362. __kmpc_atomic_fixed8u_shr
  363. __kmpc_atomic_fixed8u_shr_cpt
  364. __kmpc_atomic_fixed8u_shr_cpt_rev
  365. __kmpc_atomic_fixed8u_shr_rev
  366. @endcode
  367. Functions for floating point
  368. ----------------------------
  369. There are versions here for floating point numbers of size 4, 8, 10 and 16
  370. bytes. (Ten byte floats are used by X87, but are now rare).
  371. @code
  372. __kmpc_atomic_float4_add
  373. __kmpc_atomic_float4_add_cpt
  374. __kmpc_atomic_float4_add_float8
  375. __kmpc_atomic_float4_add_fp
  376. __kmpc_atomic_float4_div
  377. __kmpc_atomic_float4_div_cpt
  378. __kmpc_atomic_float4_div_cpt_rev
  379. __kmpc_atomic_float4_div_float8
  380. __kmpc_atomic_float4_div_fp
  381. __kmpc_atomic_float4_div_rev
  382. __kmpc_atomic_float4_max
  383. __kmpc_atomic_float4_max_cpt
  384. __kmpc_atomic_float4_min
  385. __kmpc_atomic_float4_min_cpt
  386. __kmpc_atomic_float4_mul
  387. __kmpc_atomic_float4_mul_cpt
  388. __kmpc_atomic_float4_mul_float8
  389. __kmpc_atomic_float4_mul_fp
  390. __kmpc_atomic_float4_rd
  391. __kmpc_atomic_float4_sub
  392. __kmpc_atomic_float4_sub_cpt
  393. __kmpc_atomic_float4_sub_cpt_rev
  394. __kmpc_atomic_float4_sub_float8
  395. __kmpc_atomic_float4_sub_fp
  396. __kmpc_atomic_float4_sub_rev
  397. __kmpc_atomic_float4_swp
  398. __kmpc_atomic_float4_wr
  399. __kmpc_atomic_float8_add
  400. __kmpc_atomic_float8_add_cpt
  401. __kmpc_atomic_float8_add_fp
  402. __kmpc_atomic_float8_div
  403. __kmpc_atomic_float8_div_cpt
  404. __kmpc_atomic_float8_div_cpt_rev
  405. __kmpc_atomic_float8_div_fp
  406. __kmpc_atomic_float8_div_rev
  407. __kmpc_atomic_float8_max
  408. __kmpc_atomic_float8_max_cpt
  409. __kmpc_atomic_float8_min
  410. __kmpc_atomic_float8_min_cpt
  411. __kmpc_atomic_float8_mul
  412. __kmpc_atomic_float8_mul_cpt
  413. __kmpc_atomic_float8_mul_fp
  414. __kmpc_atomic_float8_rd
  415. __kmpc_atomic_float8_sub
  416. __kmpc_atomic_float8_sub_cpt
  417. __kmpc_atomic_float8_sub_cpt_rev
  418. __kmpc_atomic_float8_sub_fp
  419. __kmpc_atomic_float8_sub_rev
  420. __kmpc_atomic_float8_swp
  421. __kmpc_atomic_float8_wr
  422. __kmpc_atomic_float10_add
  423. __kmpc_atomic_float10_add_cpt
  424. __kmpc_atomic_float10_add_fp
  425. __kmpc_atomic_float10_div
  426. __kmpc_atomic_float10_div_cpt
  427. __kmpc_atomic_float10_div_cpt_rev
  428. __kmpc_atomic_float10_div_fp
  429. __kmpc_atomic_float10_div_rev
  430. __kmpc_atomic_float10_mul
  431. __kmpc_atomic_float10_mul_cpt
  432. __kmpc_atomic_float10_mul_fp
  433. __kmpc_atomic_float10_rd
  434. __kmpc_atomic_float10_sub
  435. __kmpc_atomic_float10_sub_cpt
  436. __kmpc_atomic_float10_sub_cpt_rev
  437. __kmpc_atomic_float10_sub_fp
  438. __kmpc_atomic_float10_sub_rev
  439. __kmpc_atomic_float10_swp
  440. __kmpc_atomic_float10_wr
  441. __kmpc_atomic_float16_add
  442. __kmpc_atomic_float16_add_cpt
  443. __kmpc_atomic_float16_div
  444. __kmpc_atomic_float16_div_cpt
  445. __kmpc_atomic_float16_div_cpt_rev
  446. __kmpc_atomic_float16_div_rev
  447. __kmpc_atomic_float16_max
  448. __kmpc_atomic_float16_max_cpt
  449. __kmpc_atomic_float16_min
  450. __kmpc_atomic_float16_min_cpt
  451. __kmpc_atomic_float16_mul
  452. __kmpc_atomic_float16_mul_cpt
  453. __kmpc_atomic_float16_rd
  454. __kmpc_atomic_float16_sub
  455. __kmpc_atomic_float16_sub_cpt
  456. __kmpc_atomic_float16_sub_cpt_rev
  457. __kmpc_atomic_float16_sub_rev
  458. __kmpc_atomic_float16_swp
  459. __kmpc_atomic_float16_wr
  460. @endcode
  461. Functions for Complex types
  462. ---------------------------
  463. Functions for complex types whose component floating point variables are of size
  464. 4,8,10 or 16 bytes. The names here are based on the size of the component float,
  465. *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
  466. operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
  467. @code
  468. __kmpc_atomic_cmplx4_add
  469. __kmpc_atomic_cmplx4_add_cmplx8
  470. __kmpc_atomic_cmplx4_add_cpt
  471. __kmpc_atomic_cmplx4_div
  472. __kmpc_atomic_cmplx4_div_cmplx8
  473. __kmpc_atomic_cmplx4_div_cpt
  474. __kmpc_atomic_cmplx4_div_cpt_rev
  475. __kmpc_atomic_cmplx4_div_rev
  476. __kmpc_atomic_cmplx4_mul
  477. __kmpc_atomic_cmplx4_mul_cmplx8
  478. __kmpc_atomic_cmplx4_mul_cpt
  479. __kmpc_atomic_cmplx4_rd
  480. __kmpc_atomic_cmplx4_sub
  481. __kmpc_atomic_cmplx4_sub_cmplx8
  482. __kmpc_atomic_cmplx4_sub_cpt
  483. __kmpc_atomic_cmplx4_sub_cpt_rev
  484. __kmpc_atomic_cmplx4_sub_rev
  485. __kmpc_atomic_cmplx4_swp
  486. __kmpc_atomic_cmplx4_wr
  487. __kmpc_atomic_cmplx8_add
  488. __kmpc_atomic_cmplx8_add_cpt
  489. __kmpc_atomic_cmplx8_div
  490. __kmpc_atomic_cmplx8_div_cpt
  491. __kmpc_atomic_cmplx8_div_cpt_rev
  492. __kmpc_atomic_cmplx8_div_rev
  493. __kmpc_atomic_cmplx8_mul
  494. __kmpc_atomic_cmplx8_mul_cpt
  495. __kmpc_atomic_cmplx8_rd
  496. __kmpc_atomic_cmplx8_sub
  497. __kmpc_atomic_cmplx8_sub_cpt
  498. __kmpc_atomic_cmplx8_sub_cpt_rev
  499. __kmpc_atomic_cmplx8_sub_rev
  500. __kmpc_atomic_cmplx8_swp
  501. __kmpc_atomic_cmplx8_wr
  502. __kmpc_atomic_cmplx10_add
  503. __kmpc_atomic_cmplx10_add_cpt
  504. __kmpc_atomic_cmplx10_div
  505. __kmpc_atomic_cmplx10_div_cpt
  506. __kmpc_atomic_cmplx10_div_cpt_rev
  507. __kmpc_atomic_cmplx10_div_rev
  508. __kmpc_atomic_cmplx10_mul
  509. __kmpc_atomic_cmplx10_mul_cpt
  510. __kmpc_atomic_cmplx10_rd
  511. __kmpc_atomic_cmplx10_sub
  512. __kmpc_atomic_cmplx10_sub_cpt
  513. __kmpc_atomic_cmplx10_sub_cpt_rev
  514. __kmpc_atomic_cmplx10_sub_rev
  515. __kmpc_atomic_cmplx10_swp
  516. __kmpc_atomic_cmplx10_wr
  517. __kmpc_atomic_cmplx16_add
  518. __kmpc_atomic_cmplx16_add_cpt
  519. __kmpc_atomic_cmplx16_div
  520. __kmpc_atomic_cmplx16_div_cpt
  521. __kmpc_atomic_cmplx16_div_cpt_rev
  522. __kmpc_atomic_cmplx16_div_rev
  523. __kmpc_atomic_cmplx16_mul
  524. __kmpc_atomic_cmplx16_mul_cpt
  525. __kmpc_atomic_cmplx16_rd
  526. __kmpc_atomic_cmplx16_sub
  527. __kmpc_atomic_cmplx16_sub_cpt
  528. __kmpc_atomic_cmplx16_sub_cpt_rev
  529. __kmpc_atomic_cmplx16_swp
  530. __kmpc_atomic_cmplx16_wr
  531. @endcode
  532. */
  533. /*!
  534. @ingroup ATOMIC_OPS
  535. @{
  536. */
  537. /*
  538. * Global vars
  539. */
  540. #ifndef KMP_GOMP_COMPAT
  541. int __kmp_atomic_mode = 1; // Intel perf
  542. #else
  543. int __kmp_atomic_mode = 2; // GOMP compatibility
  544. #endif /* KMP_GOMP_COMPAT */
  545. KMP_ALIGN(128)
  546. // Control access to all user coded atomics in Gnu compat mode
  547. kmp_atomic_lock_t __kmp_atomic_lock;
  548. // Control access to all user coded atomics for 1-byte fixed data types
  549. kmp_atomic_lock_t __kmp_atomic_lock_1i;
  550. // Control access to all user coded atomics for 2-byte fixed data types
  551. kmp_atomic_lock_t __kmp_atomic_lock_2i;
  552. // Control access to all user coded atomics for 4-byte fixed data types
  553. kmp_atomic_lock_t __kmp_atomic_lock_4i;
  554. // Control access to all user coded atomics for kmp_real32 data type
  555. kmp_atomic_lock_t __kmp_atomic_lock_4r;
  556. // Control access to all user coded atomics for 8-byte fixed data types
  557. kmp_atomic_lock_t __kmp_atomic_lock_8i;
  558. // Control access to all user coded atomics for kmp_real64 data type
  559. kmp_atomic_lock_t __kmp_atomic_lock_8r;
  560. // Control access to all user coded atomics for complex byte data type
  561. kmp_atomic_lock_t __kmp_atomic_lock_8c;
  562. // Control access to all user coded atomics for long double data type
  563. kmp_atomic_lock_t __kmp_atomic_lock_10r;
  564. // Control access to all user coded atomics for _Quad data type
  565. kmp_atomic_lock_t __kmp_atomic_lock_16r;
  566. // Control access to all user coded atomics for double complex data type
  567. kmp_atomic_lock_t __kmp_atomic_lock_16c;
  568. // Control access to all user coded atomics for long double complex type
  569. kmp_atomic_lock_t __kmp_atomic_lock_20c;
  570. // Control access to all user coded atomics for _Quad complex data type
  571. kmp_atomic_lock_t __kmp_atomic_lock_32c;
  572. /* 2007-03-02:
  573. Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
  574. on *_32 and *_32e. This is just a temporary workaround for the problem. It
  575. seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
  576. in assembler language. */
  577. #define KMP_ATOMIC_VOLATILE volatile
  578. #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
  579. static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  580. return lhs.q + rhs.q;
  581. }
  582. static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  583. return lhs.q - rhs.q;
  584. }
  585. static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  586. return lhs.q * rhs.q;
  587. }
  588. static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  589. return lhs.q / rhs.q;
  590. }
  591. static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  592. return lhs.q < rhs.q;
  593. }
  594. static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
  595. return lhs.q > rhs.q;
  596. }
  597. static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  598. return lhs.q + rhs.q;
  599. }
  600. static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  601. return lhs.q - rhs.q;
  602. }
  603. static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  604. return lhs.q * rhs.q;
  605. }
  606. static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  607. return lhs.q / rhs.q;
  608. }
  609. static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  610. return lhs.q < rhs.q;
  611. }
  612. static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
  613. return lhs.q > rhs.q;
  614. }
  615. static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
  616. kmp_cmplx128_a4_t &rhs) {
  617. return lhs.q + rhs.q;
  618. }
  619. static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
  620. kmp_cmplx128_a4_t &rhs) {
  621. return lhs.q - rhs.q;
  622. }
  623. static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
  624. kmp_cmplx128_a4_t &rhs) {
  625. return lhs.q * rhs.q;
  626. }
  627. static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
  628. kmp_cmplx128_a4_t &rhs) {
  629. return lhs.q / rhs.q;
  630. }
  631. static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
  632. kmp_cmplx128_a16_t &rhs) {
  633. return lhs.q + rhs.q;
  634. }
  635. static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
  636. kmp_cmplx128_a16_t &rhs) {
  637. return lhs.q - rhs.q;
  638. }
  639. static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
  640. kmp_cmplx128_a16_t &rhs) {
  641. return lhs.q * rhs.q;
  642. }
  643. static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
  644. kmp_cmplx128_a16_t &rhs) {
  645. return lhs.q / rhs.q;
  646. }
  647. #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
  648. // ATOMIC implementation routines -----------------------------------------
  649. // One routine for each operation and operand type.
  650. // All routines declarations looks like
  651. // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
  652. #define KMP_CHECK_GTID \
  653. if (gtid == KMP_GTID_UNKNOWN) { \
  654. gtid = __kmp_entry_gtid(); \
  655. } // check and get gtid when needed
  656. // Beginning of a definition (provides name, parameters, gebug trace)
  657. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  658. // fixed)
  659. // OP_ID - operation identifier (add, sub, mul, ...)
  660. // TYPE - operands' type
  661. #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
  662. RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
  663. TYPE *lhs, TYPE rhs) { \
  664. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  665. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
  666. // ------------------------------------------------------------------------
  667. // Lock variables used for critical sections for various size operands
  668. #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
  669. #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
  670. #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
  671. #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
  672. #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
  673. #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
  674. #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
  675. #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
  676. #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
  677. #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
  678. #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
  679. #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
  680. #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
  681. // ------------------------------------------------------------------------
  682. // Operation on *lhs, rhs bound by critical section
  683. // OP - operator (it's supposed to contain an assignment)
  684. // LCK_ID - lock identifier
  685. // Note: don't check gtid as it should always be valid
  686. // 1, 2-byte - expect valid parameter, other - check before this macro
  687. #define OP_CRITICAL(OP, LCK_ID) \
  688. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  689. \
  690. (*lhs) OP(rhs); \
  691. \
  692. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  693. #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
  694. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  695. (*lhs) = (TYPE)((*lhs)OP rhs); \
  696. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  697. // ------------------------------------------------------------------------
  698. // For GNU compatibility, we may need to use a critical section,
  699. // even though it is not required by the ISA.
  700. //
  701. // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
  702. // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
  703. // critical section. On Intel(R) 64, all atomic operations are done with fetch
  704. // and add or compare and exchange. Therefore, the FLAG parameter to this
  705. // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
  706. // require a critical section, where we predict that they will be implemented
  707. // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
  708. //
  709. // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
  710. // the FLAG parameter should always be 1. If we know that we will be using
  711. // a critical section, then we want to make certain that we use the generic
  712. // lock __kmp_atomic_lock to protect the atomic update, and not of of the
  713. // locks that are specialized based upon the size or type of the data.
  714. //
  715. // If FLAG is 0, then we are relying on dead code elimination by the build
  716. // compiler to get rid of the useless block of code, and save a needless
  717. // branch at runtime.
  718. #ifdef KMP_GOMP_COMPAT
  719. #define OP_GOMP_CRITICAL(OP, FLAG) \
  720. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  721. KMP_CHECK_GTID; \
  722. OP_CRITICAL(OP, 0); \
  723. return; \
  724. }
  725. #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
  726. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  727. KMP_CHECK_GTID; \
  728. OP_UPDATE_CRITICAL(TYPE, OP, 0); \
  729. return; \
  730. }
  731. #else
  732. #define OP_GOMP_CRITICAL(OP, FLAG)
  733. #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
  734. #endif /* KMP_GOMP_COMPAT */
  735. #if KMP_MIC
  736. #define KMP_DO_PAUSE _mm_delay_32(1)
  737. #else
  738. #define KMP_DO_PAUSE
  739. #endif /* KMP_MIC */
  740. // ------------------------------------------------------------------------
  741. // Operation on *lhs, rhs using "compare_and_store" routine
  742. // TYPE - operands' type
  743. // BITS - size in bits, used to distinguish low level calls
  744. // OP - operator
  745. #define OP_CMPXCHG(TYPE, BITS, OP) \
  746. { \
  747. TYPE old_value, new_value; \
  748. old_value = *(TYPE volatile *)lhs; \
  749. new_value = (TYPE)(old_value OP rhs); \
  750. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  751. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  752. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  753. KMP_DO_PAUSE; \
  754. \
  755. old_value = *(TYPE volatile *)lhs; \
  756. new_value = (TYPE)(old_value OP rhs); \
  757. } \
  758. }
  759. #if USE_CMPXCHG_FIX
  760. // 2007-06-25:
  761. // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
  762. // and win_32e are affected (I verified the asm). Compiler ignores the volatile
  763. // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
  764. // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
  765. // the workaround.
  766. #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
  767. { \
  768. struct _sss { \
  769. TYPE cmp; \
  770. kmp_int##BITS *vvv; \
  771. }; \
  772. struct _sss old_value, new_value; \
  773. old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
  774. new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
  775. *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
  776. new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
  777. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  778. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
  779. *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
  780. KMP_DO_PAUSE; \
  781. \
  782. *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
  783. new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
  784. } \
  785. }
  786. // end of the first part of the workaround for C78287
  787. #endif // USE_CMPXCHG_FIX
  788. #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
  789. // Undo explicit type casts to get MSVC ARM64 to build. Uses
  790. // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
  791. #undef OP_CMPXCHG
  792. #define OP_CMPXCHG(TYPE, BITS, OP) \
  793. { \
  794. struct _sss { \
  795. TYPE cmp; \
  796. kmp_int##BITS *vvv; \
  797. }; \
  798. struct _sss old_value, new_value; \
  799. old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
  800. new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
  801. *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
  802. new_value.cmp = old_value.cmp OP rhs; \
  803. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  804. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
  805. *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
  806. KMP_DO_PAUSE; \
  807. \
  808. *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
  809. new_value.cmp = old_value.cmp OP rhs; \
  810. } \
  811. }
  812. #undef OP_UPDATE_CRITICAL
  813. #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
  814. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  815. (*lhs) = (*lhs)OP rhs; \
  816. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  817. #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
  818. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  819. // ------------------------------------------------------------------------
  820. // X86 or X86_64: no alignment problems ====================================
  821. #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  822. GOMP_FLAG) \
  823. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  824. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  825. /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
  826. KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
  827. }
  828. // -------------------------------------------------------------------------
  829. #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  830. GOMP_FLAG) \
  831. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  832. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  833. OP_CMPXCHG(TYPE, BITS, OP) \
  834. }
  835. #if USE_CMPXCHG_FIX
  836. // -------------------------------------------------------------------------
  837. // workaround for C78287 (complex(kind=4) data type)
  838. #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
  839. MASK, GOMP_FLAG) \
  840. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  841. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  842. OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
  843. }
  844. // end of the second part of the workaround for C78287
  845. #endif // USE_CMPXCHG_FIX
  846. #else
  847. // -------------------------------------------------------------------------
  848. // Code for other architectures that don't handle unaligned accesses.
  849. #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  850. GOMP_FLAG) \
  851. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  852. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  853. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  854. /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
  855. KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
  856. } else { \
  857. KMP_CHECK_GTID; \
  858. OP_UPDATE_CRITICAL(TYPE, OP, \
  859. LCK_ID) /* unaligned address - use critical */ \
  860. } \
  861. }
  862. // -------------------------------------------------------------------------
  863. #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  864. GOMP_FLAG) \
  865. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  866. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  867. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  868. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  869. } else { \
  870. KMP_CHECK_GTID; \
  871. OP_UPDATE_CRITICAL(TYPE, OP, \
  872. LCK_ID) /* unaligned address - use critical */ \
  873. } \
  874. }
  875. #if USE_CMPXCHG_FIX
  876. // -------------------------------------------------------------------------
  877. // workaround for C78287 (complex(kind=4) data type)
  878. #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
  879. MASK, GOMP_FLAG) \
  880. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  881. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  882. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  883. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  884. } else { \
  885. KMP_CHECK_GTID; \
  886. OP_UPDATE_CRITICAL(TYPE, OP, \
  887. LCK_ID) /* unaligned address - use critical */ \
  888. } \
  889. }
  890. // end of the second part of the workaround for C78287
  891. #endif // USE_CMPXCHG_FIX
  892. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  893. // Routines for ATOMIC 4-byte operands addition and subtraction
  894. ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
  895. 0) // __kmpc_atomic_fixed4_add
  896. ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
  897. 0) // __kmpc_atomic_fixed4_sub
  898. ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
  899. KMP_ARCH_X86) // __kmpc_atomic_float4_add
  900. ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
  901. KMP_ARCH_X86) // __kmpc_atomic_float4_sub
  902. // Routines for ATOMIC 8-byte operands addition and subtraction
  903. ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
  904. KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
  905. ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
  906. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
  907. ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
  908. KMP_ARCH_X86) // __kmpc_atomic_float8_add
  909. ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
  910. KMP_ARCH_X86) // __kmpc_atomic_float8_sub
  911. // ------------------------------------------------------------------------
  912. // Entries definition for integer operands
  913. // TYPE_ID - operands type and size (fixed4, float4)
  914. // OP_ID - operation identifier (add, sub, mul, ...)
  915. // TYPE - operand type
  916. // BITS - size in bits, used to distinguish low level calls
  917. // OP - operator (used in critical section)
  918. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  919. // MASK - used for alignment check
  920. // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
  921. // ------------------------------------------------------------------------
  922. // Routines for ATOMIC integer operands, other operators
  923. // ------------------------------------------------------------------------
  924. // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
  925. ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
  926. KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
  927. ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
  928. 0) // __kmpc_atomic_fixed1_andb
  929. ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
  930. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
  931. ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
  932. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
  933. ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
  934. KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
  935. ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
  936. 0) // __kmpc_atomic_fixed1_orb
  937. ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
  938. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
  939. ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
  940. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
  941. ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
  942. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
  943. ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
  944. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
  945. ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
  946. 0) // __kmpc_atomic_fixed1_xor
  947. ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
  948. KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
  949. ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
  950. 0) // __kmpc_atomic_fixed2_andb
  951. ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
  952. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
  953. ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
  954. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
  955. ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
  956. KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
  957. ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
  958. 0) // __kmpc_atomic_fixed2_orb
  959. ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
  960. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
  961. ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
  962. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
  963. ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
  964. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
  965. ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
  966. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
  967. ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
  968. 0) // __kmpc_atomic_fixed2_xor
  969. ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
  970. 0) // __kmpc_atomic_fixed4_andb
  971. ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
  972. KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
  973. ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
  974. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
  975. ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
  976. KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
  977. ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
  978. 0) // __kmpc_atomic_fixed4_orb
  979. ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
  980. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
  981. ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
  982. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
  983. ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
  984. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
  985. ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
  986. 0) // __kmpc_atomic_fixed4_xor
  987. ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
  988. KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
  989. ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
  990. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
  991. ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
  992. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
  993. ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
  994. KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
  995. ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
  996. KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
  997. ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
  998. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
  999. ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
  1000. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
  1001. ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
  1002. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
  1003. ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
  1004. KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
  1005. ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
  1006. KMP_ARCH_X86) // __kmpc_atomic_float4_div
  1007. ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
  1008. KMP_ARCH_X86) // __kmpc_atomic_float4_mul
  1009. ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
  1010. KMP_ARCH_X86) // __kmpc_atomic_float8_div
  1011. ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
  1012. KMP_ARCH_X86) // __kmpc_atomic_float8_mul
  1013. // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
  1014. /* ------------------------------------------------------------------------ */
  1015. /* Routines for C/C++ Reduction operators && and || */
  1016. // ------------------------------------------------------------------------
  1017. // Need separate macros for &&, || because there is no combined assignment
  1018. // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
  1019. #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1020. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1021. OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
  1022. OP_CRITICAL(= *lhs OP, LCK_ID) \
  1023. }
  1024. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1025. // ------------------------------------------------------------------------
  1026. // X86 or X86_64: no alignment problems ===================================
  1027. #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
  1028. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1029. OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
  1030. OP_CMPXCHG(TYPE, BITS, OP) \
  1031. }
  1032. #else
  1033. // ------------------------------------------------------------------------
  1034. // Code for other architectures that don't handle unaligned accesses.
  1035. #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
  1036. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1037. OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
  1038. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  1039. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  1040. } else { \
  1041. KMP_CHECK_GTID; \
  1042. OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
  1043. } \
  1044. }
  1045. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1046. ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
  1047. KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
  1048. ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
  1049. KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
  1050. ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
  1051. KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
  1052. ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
  1053. KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
  1054. ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
  1055. 0) // __kmpc_atomic_fixed4_andl
  1056. ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
  1057. 0) // __kmpc_atomic_fixed4_orl
  1058. ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
  1059. KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
  1060. ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
  1061. KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
  1062. /* ------------------------------------------------------------------------- */
  1063. /* Routines for Fortran operators that matched no one in C: */
  1064. /* MAX, MIN, .EQV., .NEQV. */
  1065. /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
  1066. /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
  1067. // -------------------------------------------------------------------------
  1068. // MIN and MAX need separate macros
  1069. // OP - operator to check if we need any actions?
  1070. #define MIN_MAX_CRITSECT(OP, LCK_ID) \
  1071. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  1072. \
  1073. if (*lhs OP rhs) { /* still need actions? */ \
  1074. *lhs = rhs; \
  1075. } \
  1076. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  1077. // -------------------------------------------------------------------------
  1078. #ifdef KMP_GOMP_COMPAT
  1079. #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
  1080. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  1081. KMP_CHECK_GTID; \
  1082. MIN_MAX_CRITSECT(OP, 0); \
  1083. return; \
  1084. }
  1085. #else
  1086. #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
  1087. #endif /* KMP_GOMP_COMPAT */
  1088. // -------------------------------------------------------------------------
  1089. #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
  1090. { \
  1091. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  1092. TYPE old_value; \
  1093. temp_val = *lhs; \
  1094. old_value = temp_val; \
  1095. while (old_value OP rhs && /* still need actions? */ \
  1096. !KMP_COMPARE_AND_STORE_ACQ##BITS( \
  1097. (kmp_int##BITS *)lhs, \
  1098. *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  1099. *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
  1100. temp_val = *lhs; \
  1101. old_value = temp_val; \
  1102. } \
  1103. }
  1104. // -------------------------------------------------------------------------
  1105. // 1-byte, 2-byte operands - use critical section
  1106. #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1107. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1108. if (*lhs OP rhs) { /* need actions? */ \
  1109. GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
  1110. MIN_MAX_CRITSECT(OP, LCK_ID) \
  1111. } \
  1112. }
  1113. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1114. // -------------------------------------------------------------------------
  1115. // X86 or X86_64: no alignment problems ====================================
  1116. #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  1117. GOMP_FLAG) \
  1118. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1119. if (*lhs OP rhs) { \
  1120. GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
  1121. MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
  1122. } \
  1123. }
  1124. #else
  1125. // -------------------------------------------------------------------------
  1126. // Code for other architectures that don't handle unaligned accesses.
  1127. #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  1128. GOMP_FLAG) \
  1129. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1130. if (*lhs OP rhs) { \
  1131. GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
  1132. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  1133. MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  1134. } else { \
  1135. KMP_CHECK_GTID; \
  1136. MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
  1137. } \
  1138. } \
  1139. }
  1140. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1141. MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
  1142. KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
  1143. MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
  1144. KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
  1145. MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
  1146. KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
  1147. MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
  1148. KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
  1149. MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
  1150. 0) // __kmpc_atomic_fixed4_max
  1151. MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
  1152. 0) // __kmpc_atomic_fixed4_min
  1153. MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
  1154. KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
  1155. MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
  1156. KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
  1157. MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
  1158. KMP_ARCH_X86) // __kmpc_atomic_float4_max
  1159. MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
  1160. KMP_ARCH_X86) // __kmpc_atomic_float4_min
  1161. MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
  1162. KMP_ARCH_X86) // __kmpc_atomic_float8_max
  1163. MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
  1164. KMP_ARCH_X86) // __kmpc_atomic_float8_min
  1165. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1166. MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
  1167. 1) // __kmpc_atomic_float10_max
  1168. MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
  1169. 1) // __kmpc_atomic_float10_min
  1170. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  1171. #if KMP_HAVE_QUAD
  1172. MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
  1173. 1) // __kmpc_atomic_float16_max
  1174. MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
  1175. 1) // __kmpc_atomic_float16_min
  1176. #if (KMP_ARCH_X86)
  1177. MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
  1178. 1) // __kmpc_atomic_float16_max_a16
  1179. MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
  1180. 1) // __kmpc_atomic_float16_min_a16
  1181. #endif // (KMP_ARCH_X86)
  1182. #endif // KMP_HAVE_QUAD
  1183. // ------------------------------------------------------------------------
  1184. // Need separate macros for .EQV. because of the need of complement (~)
  1185. // OP ignored for critical sections, ^=~ used instead
  1186. #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1187. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1188. OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
  1189. OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
  1190. }
  1191. // ------------------------------------------------------------------------
  1192. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1193. // ------------------------------------------------------------------------
  1194. // X86 or X86_64: no alignment problems ===================================
  1195. #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  1196. GOMP_FLAG) \
  1197. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1198. OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
  1199. OP_CMPXCHG(TYPE, BITS, OP) \
  1200. }
  1201. // ------------------------------------------------------------------------
  1202. #else
  1203. // ------------------------------------------------------------------------
  1204. // Code for other architectures that don't handle unaligned accesses.
  1205. #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
  1206. GOMP_FLAG) \
  1207. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1208. OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
  1209. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  1210. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  1211. } else { \
  1212. KMP_CHECK_GTID; \
  1213. OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
  1214. } \
  1215. }
  1216. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1217. ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
  1218. KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
  1219. ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
  1220. KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
  1221. ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
  1222. KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
  1223. ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
  1224. KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
  1225. ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
  1226. KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
  1227. ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
  1228. KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
  1229. ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
  1230. KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
  1231. ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
  1232. KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
  1233. // ------------------------------------------------------------------------
  1234. // Routines for Extended types: long double, _Quad, complex flavours (use
  1235. // critical section)
  1236. // TYPE_ID, OP_ID, TYPE - detailed above
  1237. // OP - operator
  1238. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  1239. #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1240. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1241. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
  1242. OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
  1243. }
  1244. /* ------------------------------------------------------------------------- */
  1245. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1246. // routines for long double type
  1247. ATOMIC_CRITICAL(float10, add, long double, +, 10r,
  1248. 1) // __kmpc_atomic_float10_add
  1249. ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
  1250. 1) // __kmpc_atomic_float10_sub
  1251. ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
  1252. 1) // __kmpc_atomic_float10_mul
  1253. ATOMIC_CRITICAL(float10, div, long double, /, 10r,
  1254. 1) // __kmpc_atomic_float10_div
  1255. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  1256. #if KMP_HAVE_QUAD
  1257. // routines for _Quad type
  1258. ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
  1259. 1) // __kmpc_atomic_float16_add
  1260. ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
  1261. 1) // __kmpc_atomic_float16_sub
  1262. ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
  1263. 1) // __kmpc_atomic_float16_mul
  1264. ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
  1265. 1) // __kmpc_atomic_float16_div
  1266. #if (KMP_ARCH_X86)
  1267. ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
  1268. 1) // __kmpc_atomic_float16_add_a16
  1269. ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
  1270. 1) // __kmpc_atomic_float16_sub_a16
  1271. ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
  1272. 1) // __kmpc_atomic_float16_mul_a16
  1273. ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
  1274. 1) // __kmpc_atomic_float16_div_a16
  1275. #endif // (KMP_ARCH_X86)
  1276. #endif // KMP_HAVE_QUAD
  1277. // routines for complex types
  1278. #if USE_CMPXCHG_FIX
  1279. // workaround for C78287 (complex(kind=4) data type)
  1280. ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
  1281. 1) // __kmpc_atomic_cmplx4_add
  1282. ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
  1283. 1) // __kmpc_atomic_cmplx4_sub
  1284. ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
  1285. 1) // __kmpc_atomic_cmplx4_mul
  1286. ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
  1287. 1) // __kmpc_atomic_cmplx4_div
  1288. // end of the workaround for C78287
  1289. #else
  1290. ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
  1291. ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
  1292. ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
  1293. ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
  1294. #endif // USE_CMPXCHG_FIX
  1295. ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
  1296. ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
  1297. ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
  1298. ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
  1299. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1300. ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
  1301. 1) // __kmpc_atomic_cmplx10_add
  1302. ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
  1303. 1) // __kmpc_atomic_cmplx10_sub
  1304. ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
  1305. 1) // __kmpc_atomic_cmplx10_mul
  1306. ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
  1307. 1) // __kmpc_atomic_cmplx10_div
  1308. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  1309. #if KMP_HAVE_QUAD
  1310. ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
  1311. 1) // __kmpc_atomic_cmplx16_add
  1312. ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
  1313. 1) // __kmpc_atomic_cmplx16_sub
  1314. ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
  1315. 1) // __kmpc_atomic_cmplx16_mul
  1316. ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
  1317. 1) // __kmpc_atomic_cmplx16_div
  1318. #if (KMP_ARCH_X86)
  1319. ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
  1320. 1) // __kmpc_atomic_cmplx16_add_a16
  1321. ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
  1322. 1) // __kmpc_atomic_cmplx16_sub_a16
  1323. ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
  1324. 1) // __kmpc_atomic_cmplx16_mul_a16
  1325. ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
  1326. 1) // __kmpc_atomic_cmplx16_div_a16
  1327. #endif // (KMP_ARCH_X86)
  1328. #endif // KMP_HAVE_QUAD
  1329. // OpenMP 4.0: x = expr binop x for non-commutative operations.
  1330. // Supported only on IA-32 architecture and Intel(R) 64
  1331. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1332. // ------------------------------------------------------------------------
  1333. // Operation on *lhs, rhs bound by critical section
  1334. // OP - operator (it's supposed to contain an assignment)
  1335. // LCK_ID - lock identifier
  1336. // Note: don't check gtid as it should always be valid
  1337. // 1, 2-byte - expect valid parameter, other - check before this macro
  1338. #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
  1339. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  1340. \
  1341. (*lhs) = (TYPE)((rhs)OP(*lhs)); \
  1342. \
  1343. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  1344. #ifdef KMP_GOMP_COMPAT
  1345. #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
  1346. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  1347. KMP_CHECK_GTID; \
  1348. OP_CRITICAL_REV(TYPE, OP, 0); \
  1349. return; \
  1350. }
  1351. #else
  1352. #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
  1353. #endif /* KMP_GOMP_COMPAT */
  1354. // Beginning of a definition (provides name, parameters, gebug trace)
  1355. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  1356. // fixed)
  1357. // OP_ID - operation identifier (add, sub, mul, ...)
  1358. // TYPE - operands' type
  1359. #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
  1360. RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
  1361. TYPE *lhs, TYPE rhs) { \
  1362. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  1363. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
  1364. // ------------------------------------------------------------------------
  1365. // Operation on *lhs, rhs using "compare_and_store" routine
  1366. // TYPE - operands' type
  1367. // BITS - size in bits, used to distinguish low level calls
  1368. // OP - operator
  1369. // Note: temp_val introduced in order to force the compiler to read
  1370. // *lhs only once (w/o it the compiler reads *lhs twice)
  1371. #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
  1372. { \
  1373. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  1374. TYPE old_value, new_value; \
  1375. temp_val = *lhs; \
  1376. old_value = temp_val; \
  1377. new_value = (TYPE)(rhs OP old_value); \
  1378. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  1379. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  1380. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  1381. KMP_DO_PAUSE; \
  1382. \
  1383. temp_val = *lhs; \
  1384. old_value = temp_val; \
  1385. new_value = (TYPE)(rhs OP old_value); \
  1386. } \
  1387. }
  1388. // -------------------------------------------------------------------------
  1389. #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
  1390. ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
  1391. OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
  1392. OP_CMPXCHG_REV(TYPE, BITS, OP) \
  1393. }
  1394. // ------------------------------------------------------------------------
  1395. // Entries definition for integer operands
  1396. // TYPE_ID - operands type and size (fixed4, float4)
  1397. // OP_ID - operation identifier (add, sub, mul, ...)
  1398. // TYPE - operand type
  1399. // BITS - size in bits, used to distinguish low level calls
  1400. // OP - operator (used in critical section)
  1401. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  1402. // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
  1403. // ------------------------------------------------------------------------
  1404. // Routines for ATOMIC integer operands, other operators
  1405. // ------------------------------------------------------------------------
  1406. // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
  1407. ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
  1408. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
  1409. ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
  1410. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
  1411. ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
  1412. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
  1413. ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
  1414. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
  1415. ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
  1416. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
  1417. ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
  1418. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
  1419. ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
  1420. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
  1421. ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
  1422. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
  1423. ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
  1424. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
  1425. ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
  1426. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
  1427. ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
  1428. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
  1429. ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
  1430. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
  1431. ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
  1432. KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
  1433. ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
  1434. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
  1435. ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
  1436. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
  1437. ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
  1438. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
  1439. ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
  1440. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
  1441. ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
  1442. KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
  1443. ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
  1444. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
  1445. ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
  1446. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
  1447. ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
  1448. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
  1449. ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
  1450. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
  1451. ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
  1452. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
  1453. ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
  1454. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
  1455. ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
  1456. KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
  1457. ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
  1458. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
  1459. ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
  1460. KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
  1461. ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
  1462. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
  1463. // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
  1464. // ------------------------------------------------------------------------
  1465. // Routines for Extended types: long double, _Quad, complex flavours (use
  1466. // critical section)
  1467. // TYPE_ID, OP_ID, TYPE - detailed above
  1468. // OP - operator
  1469. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  1470. #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1471. ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
  1472. OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
  1473. OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
  1474. }
  1475. /* ------------------------------------------------------------------------- */
  1476. // routines for long double type
  1477. ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
  1478. 1) // __kmpc_atomic_float10_sub_rev
  1479. ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
  1480. 1) // __kmpc_atomic_float10_div_rev
  1481. #if KMP_HAVE_QUAD
  1482. // routines for _Quad type
  1483. ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
  1484. 1) // __kmpc_atomic_float16_sub_rev
  1485. ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
  1486. 1) // __kmpc_atomic_float16_div_rev
  1487. #if (KMP_ARCH_X86)
  1488. ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
  1489. 1) // __kmpc_atomic_float16_sub_a16_rev
  1490. ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
  1491. 1) // __kmpc_atomic_float16_div_a16_rev
  1492. #endif // KMP_ARCH_X86
  1493. #endif // KMP_HAVE_QUAD
  1494. // routines for complex types
  1495. ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
  1496. 1) // __kmpc_atomic_cmplx4_sub_rev
  1497. ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
  1498. 1) // __kmpc_atomic_cmplx4_div_rev
  1499. ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
  1500. 1) // __kmpc_atomic_cmplx8_sub_rev
  1501. ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
  1502. 1) // __kmpc_atomic_cmplx8_div_rev
  1503. ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
  1504. 1) // __kmpc_atomic_cmplx10_sub_rev
  1505. ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
  1506. 1) // __kmpc_atomic_cmplx10_div_rev
  1507. #if KMP_HAVE_QUAD
  1508. ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
  1509. 1) // __kmpc_atomic_cmplx16_sub_rev
  1510. ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
  1511. 1) // __kmpc_atomic_cmplx16_div_rev
  1512. #if (KMP_ARCH_X86)
  1513. ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
  1514. 1) // __kmpc_atomic_cmplx16_sub_a16_rev
  1515. ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
  1516. 1) // __kmpc_atomic_cmplx16_div_a16_rev
  1517. #endif // KMP_ARCH_X86
  1518. #endif // KMP_HAVE_QUAD
  1519. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  1520. // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
  1521. /* ------------------------------------------------------------------------ */
  1522. /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
  1523. /* Note: in order to reduce the total number of types combinations */
  1524. /* it is supposed that compiler converts RHS to longest floating type,*/
  1525. /* that is _Quad, before call to any of these routines */
  1526. /* Conversion to _Quad will be done by the compiler during calculation, */
  1527. /* conversion back to TYPE - before the assignment, like: */
  1528. /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
  1529. /* Performance penalty expected because of SW emulation use */
  1530. /* ------------------------------------------------------------------------ */
  1531. #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1532. void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
  1533. ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
  1534. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  1535. KA_TRACE(100, \
  1536. ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
  1537. gtid));
  1538. // -------------------------------------------------------------------------
  1539. #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
  1540. GOMP_FLAG) \
  1541. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1542. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
  1543. OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
  1544. }
  1545. // -------------------------------------------------------------------------
  1546. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1547. // -------------------------------------------------------------------------
  1548. // X86 or X86_64: no alignment problems ====================================
  1549. #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
  1550. LCK_ID, MASK, GOMP_FLAG) \
  1551. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1552. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  1553. OP_CMPXCHG(TYPE, BITS, OP) \
  1554. }
  1555. // -------------------------------------------------------------------------
  1556. #else
  1557. // ------------------------------------------------------------------------
  1558. // Code for other architectures that don't handle unaligned accesses.
  1559. #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
  1560. LCK_ID, MASK, GOMP_FLAG) \
  1561. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1562. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  1563. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  1564. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  1565. } else { \
  1566. KMP_CHECK_GTID; \
  1567. OP_UPDATE_CRITICAL(TYPE, OP, \
  1568. LCK_ID) /* unaligned address - use critical */ \
  1569. } \
  1570. }
  1571. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1572. // -------------------------------------------------------------------------
  1573. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1574. // -------------------------------------------------------------------------
  1575. #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
  1576. RTYPE, LCK_ID, MASK, GOMP_FLAG) \
  1577. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1578. OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
  1579. OP_CMPXCHG_REV(TYPE, BITS, OP) \
  1580. }
  1581. #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
  1582. LCK_ID, GOMP_FLAG) \
  1583. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1584. OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
  1585. OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
  1586. }
  1587. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1588. // RHS=float8
  1589. ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
  1590. KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
  1591. ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
  1592. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
  1593. ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
  1594. KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
  1595. ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
  1596. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
  1597. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
  1598. 0) // __kmpc_atomic_fixed4_mul_float8
  1599. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
  1600. 0) // __kmpc_atomic_fixed4_div_float8
  1601. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
  1602. KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
  1603. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
  1604. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
  1605. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
  1606. KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
  1607. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
  1608. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
  1609. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
  1610. KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
  1611. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
  1612. KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
  1613. // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
  1614. // use them)
  1615. #if KMP_HAVE_QUAD
  1616. ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
  1617. KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
  1618. ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
  1619. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
  1620. ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
  1621. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
  1622. ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
  1623. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
  1624. ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
  1625. KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
  1626. ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
  1627. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
  1628. ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
  1629. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
  1630. ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
  1631. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
  1632. ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
  1633. KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
  1634. ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
  1635. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
  1636. ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
  1637. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
  1638. ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
  1639. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
  1640. ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
  1641. KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
  1642. ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
  1643. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
  1644. ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
  1645. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
  1646. ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
  1647. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
  1648. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
  1649. 0) // __kmpc_atomic_fixed4_add_fp
  1650. ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
  1651. 0) // __kmpc_atomic_fixed4u_add_fp
  1652. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
  1653. 0) // __kmpc_atomic_fixed4_sub_fp
  1654. ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
  1655. 0) // __kmpc_atomic_fixed4u_sub_fp
  1656. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
  1657. 0) // __kmpc_atomic_fixed4_mul_fp
  1658. ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
  1659. 0) // __kmpc_atomic_fixed4u_mul_fp
  1660. ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
  1661. 0) // __kmpc_atomic_fixed4_div_fp
  1662. ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
  1663. 0) // __kmpc_atomic_fixed4u_div_fp
  1664. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
  1665. KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
  1666. ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
  1667. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
  1668. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
  1669. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
  1670. ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
  1671. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
  1672. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
  1673. KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
  1674. ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
  1675. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
  1676. ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
  1677. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
  1678. ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
  1679. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
  1680. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
  1681. KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
  1682. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
  1683. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
  1684. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
  1685. KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
  1686. ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
  1687. KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
  1688. ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
  1689. KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
  1690. ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
  1691. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
  1692. ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
  1693. KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
  1694. ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
  1695. KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
  1696. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1697. ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
  1698. 1) // __kmpc_atomic_float10_add_fp
  1699. ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
  1700. 1) // __kmpc_atomic_float10_sub_fp
  1701. ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
  1702. 1) // __kmpc_atomic_float10_mul_fp
  1703. ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
  1704. 1) // __kmpc_atomic_float10_div_fp
  1705. // Reverse operations
  1706. ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
  1707. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
  1708. ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
  1709. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
  1710. ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
  1711. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
  1712. ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
  1713. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
  1714. ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
  1715. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
  1716. ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
  1717. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
  1718. ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
  1719. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
  1720. ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
  1721. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
  1722. ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
  1723. 0) // __kmpc_atomic_fixed4_sub_rev_fp
  1724. ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
  1725. 0) // __kmpc_atomic_fixed4u_sub_rev_fp
  1726. ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
  1727. 0) // __kmpc_atomic_fixed4_div_rev_fp
  1728. ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
  1729. 0) // __kmpc_atomic_fixed4u_div_rev_fp
  1730. ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
  1731. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
  1732. ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
  1733. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
  1734. ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
  1735. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
  1736. ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
  1737. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
  1738. ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
  1739. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
  1740. ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
  1741. KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
  1742. ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
  1743. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
  1744. ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
  1745. KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
  1746. ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
  1747. 1) // __kmpc_atomic_float10_sub_rev_fp
  1748. ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
  1749. 1) // __kmpc_atomic_float10_div_rev_fp
  1750. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1751. #endif // KMP_HAVE_QUAD
  1752. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1753. // ------------------------------------------------------------------------
  1754. // X86 or X86_64: no alignment problems ====================================
  1755. #if USE_CMPXCHG_FIX
  1756. // workaround for C78287 (complex(kind=4) data type)
  1757. #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
  1758. LCK_ID, MASK, GOMP_FLAG) \
  1759. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1760. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  1761. OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
  1762. }
  1763. // end of the second part of the workaround for C78287
  1764. #else
  1765. #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
  1766. LCK_ID, MASK, GOMP_FLAG) \
  1767. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1768. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  1769. OP_CMPXCHG(TYPE, BITS, OP) \
  1770. }
  1771. #endif // USE_CMPXCHG_FIX
  1772. #else
  1773. // ------------------------------------------------------------------------
  1774. // Code for other architectures that don't handle unaligned accesses.
  1775. #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
  1776. LCK_ID, MASK, GOMP_FLAG) \
  1777. ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
  1778. OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
  1779. if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
  1780. OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
  1781. } else { \
  1782. KMP_CHECK_GTID; \
  1783. OP_UPDATE_CRITICAL(TYPE, OP, \
  1784. LCK_ID) /* unaligned address - use critical */ \
  1785. } \
  1786. }
  1787. #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
  1788. ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
  1789. 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
  1790. ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
  1791. 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
  1792. ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
  1793. 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
  1794. ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
  1795. 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
  1796. // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
  1797. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  1798. // ------------------------------------------------------------------------
  1799. // Atomic READ routines
  1800. // ------------------------------------------------------------------------
  1801. // Beginning of a definition (provides name, parameters, gebug trace)
  1802. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  1803. // fixed)
  1804. // OP_ID - operation identifier (add, sub, mul, ...)
  1805. // TYPE - operands' type
  1806. #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
  1807. RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
  1808. TYPE *loc) { \
  1809. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  1810. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
  1811. // ------------------------------------------------------------------------
  1812. // Operation on *lhs, rhs using "compare_and_store_ret" routine
  1813. // TYPE - operands' type
  1814. // BITS - size in bits, used to distinguish low level calls
  1815. // OP - operator
  1816. // Note: temp_val introduced in order to force the compiler to read
  1817. // *lhs only once (w/o it the compiler reads *lhs twice)
  1818. // TODO: check if it is still necessary
  1819. // Return old value regardless of the result of "compare & swap# operation
  1820. #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
  1821. { \
  1822. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  1823. union f_i_union { \
  1824. TYPE f_val; \
  1825. kmp_int##BITS i_val; \
  1826. }; \
  1827. union f_i_union old_value; \
  1828. temp_val = *loc; \
  1829. old_value.f_val = temp_val; \
  1830. old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
  1831. (kmp_int##BITS *)loc, \
  1832. *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
  1833. *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
  1834. new_value = old_value.f_val; \
  1835. return new_value; \
  1836. }
  1837. // -------------------------------------------------------------------------
  1838. // Operation on *lhs, rhs bound by critical section
  1839. // OP - operator (it's supposed to contain an assignment)
  1840. // LCK_ID - lock identifier
  1841. // Note: don't check gtid as it should always be valid
  1842. // 1, 2-byte - expect valid parameter, other - check before this macro
  1843. #define OP_CRITICAL_READ(OP, LCK_ID) \
  1844. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  1845. \
  1846. new_value = (*loc); \
  1847. \
  1848. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  1849. // -------------------------------------------------------------------------
  1850. #ifdef KMP_GOMP_COMPAT
  1851. #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
  1852. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  1853. KMP_CHECK_GTID; \
  1854. OP_CRITICAL_READ(OP, 0); \
  1855. return new_value; \
  1856. }
  1857. #else
  1858. #define OP_GOMP_CRITICAL_READ(OP, FLAG)
  1859. #endif /* KMP_GOMP_COMPAT */
  1860. // -------------------------------------------------------------------------
  1861. #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  1862. ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
  1863. TYPE new_value; \
  1864. OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
  1865. new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
  1866. return new_value; \
  1867. }
  1868. // -------------------------------------------------------------------------
  1869. #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  1870. ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
  1871. TYPE new_value; \
  1872. OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
  1873. OP_CMPXCHG_READ(TYPE, BITS, OP) \
  1874. }
  1875. // ------------------------------------------------------------------------
  1876. // Routines for Extended types: long double, _Quad, complex flavours (use
  1877. // critical section)
  1878. // TYPE_ID, OP_ID, TYPE - detailed above
  1879. // OP - operator
  1880. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  1881. #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1882. ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
  1883. TYPE new_value; \
  1884. OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
  1885. OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
  1886. return new_value; \
  1887. }
  1888. // ------------------------------------------------------------------------
  1889. // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
  1890. // value doesn't work.
  1891. // Let's return the read value through the additional parameter.
  1892. #if (KMP_OS_WINDOWS)
  1893. #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
  1894. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  1895. \
  1896. (*out) = (*loc); \
  1897. \
  1898. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  1899. // ------------------------------------------------------------------------
  1900. #ifdef KMP_GOMP_COMPAT
  1901. #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
  1902. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  1903. KMP_CHECK_GTID; \
  1904. OP_CRITICAL_READ_WRK(OP, 0); \
  1905. }
  1906. #else
  1907. #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
  1908. #endif /* KMP_GOMP_COMPAT */
  1909. // ------------------------------------------------------------------------
  1910. #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
  1911. void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
  1912. TYPE *loc) { \
  1913. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  1914. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
  1915. // ------------------------------------------------------------------------
  1916. #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  1917. ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
  1918. OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
  1919. OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
  1920. }
  1921. #endif // KMP_OS_WINDOWS
  1922. // ------------------------------------------------------------------------
  1923. // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
  1924. ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
  1925. ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
  1926. KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
  1927. ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
  1928. KMP_ARCH_X86) // __kmpc_atomic_float4_rd
  1929. ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
  1930. KMP_ARCH_X86) // __kmpc_atomic_float8_rd
  1931. // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
  1932. ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
  1933. KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
  1934. ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
  1935. KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
  1936. ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
  1937. 1) // __kmpc_atomic_float10_rd
  1938. #if KMP_HAVE_QUAD
  1939. ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
  1940. 1) // __kmpc_atomic_float16_rd
  1941. #endif // KMP_HAVE_QUAD
  1942. // Fix for CQ220361 on Windows* OS
  1943. #if (KMP_OS_WINDOWS)
  1944. ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
  1945. 1) // __kmpc_atomic_cmplx4_rd
  1946. #else
  1947. ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
  1948. 1) // __kmpc_atomic_cmplx4_rd
  1949. #endif // (KMP_OS_WINDOWS)
  1950. ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
  1951. 1) // __kmpc_atomic_cmplx8_rd
  1952. ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
  1953. 1) // __kmpc_atomic_cmplx10_rd
  1954. #if KMP_HAVE_QUAD
  1955. ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
  1956. 1) // __kmpc_atomic_cmplx16_rd
  1957. #if (KMP_ARCH_X86)
  1958. ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
  1959. 1) // __kmpc_atomic_float16_a16_rd
  1960. ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
  1961. 1) // __kmpc_atomic_cmplx16_a16_rd
  1962. #endif // (KMP_ARCH_X86)
  1963. #endif // KMP_HAVE_QUAD
  1964. // ------------------------------------------------------------------------
  1965. // Atomic WRITE routines
  1966. #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  1967. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1968. OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
  1969. KMP_XCHG_FIXED##BITS(lhs, rhs); \
  1970. }
  1971. // ------------------------------------------------------------------------
  1972. #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  1973. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  1974. OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
  1975. KMP_XCHG_REAL##BITS(lhs, rhs); \
  1976. }
  1977. // ------------------------------------------------------------------------
  1978. // Operation on *lhs, rhs using "compare_and_store" routine
  1979. // TYPE - operands' type
  1980. // BITS - size in bits, used to distinguish low level calls
  1981. // OP - operator
  1982. // Note: temp_val introduced in order to force the compiler to read
  1983. // *lhs only once (w/o it the compiler reads *lhs twice)
  1984. #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
  1985. { \
  1986. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  1987. TYPE old_value, new_value; \
  1988. temp_val = *lhs; \
  1989. old_value = temp_val; \
  1990. new_value = rhs; \
  1991. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  1992. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  1993. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  1994. temp_val = *lhs; \
  1995. old_value = temp_val; \
  1996. new_value = rhs; \
  1997. } \
  1998. }
  1999. // -------------------------------------------------------------------------
  2000. #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2001. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  2002. OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
  2003. OP_CMPXCHG_WR(TYPE, BITS, OP) \
  2004. }
  2005. // ------------------------------------------------------------------------
  2006. // Routines for Extended types: long double, _Quad, complex flavours (use
  2007. // critical section)
  2008. // TYPE_ID, OP_ID, TYPE - detailed above
  2009. // OP - operator
  2010. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  2011. #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  2012. ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
  2013. OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
  2014. OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
  2015. }
  2016. // -------------------------------------------------------------------------
  2017. ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
  2018. KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
  2019. ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
  2020. KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
  2021. ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
  2022. KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
  2023. #if (KMP_ARCH_X86)
  2024. ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
  2025. KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
  2026. #else
  2027. ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
  2028. KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
  2029. #endif // (KMP_ARCH_X86)
  2030. ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
  2031. KMP_ARCH_X86) // __kmpc_atomic_float4_wr
  2032. #if (KMP_ARCH_X86)
  2033. ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
  2034. KMP_ARCH_X86) // __kmpc_atomic_float8_wr
  2035. #else
  2036. ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
  2037. KMP_ARCH_X86) // __kmpc_atomic_float8_wr
  2038. #endif // (KMP_ARCH_X86)
  2039. ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
  2040. 1) // __kmpc_atomic_float10_wr
  2041. #if KMP_HAVE_QUAD
  2042. ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
  2043. 1) // __kmpc_atomic_float16_wr
  2044. #endif // KMP_HAVE_QUAD
  2045. ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
  2046. ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
  2047. 1) // __kmpc_atomic_cmplx8_wr
  2048. ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
  2049. 1) // __kmpc_atomic_cmplx10_wr
  2050. #if KMP_HAVE_QUAD
  2051. ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
  2052. 1) // __kmpc_atomic_cmplx16_wr
  2053. #if (KMP_ARCH_X86)
  2054. ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
  2055. 1) // __kmpc_atomic_float16_a16_wr
  2056. ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
  2057. 1) // __kmpc_atomic_cmplx16_a16_wr
  2058. #endif // (KMP_ARCH_X86)
  2059. #endif // KMP_HAVE_QUAD
  2060. // ------------------------------------------------------------------------
  2061. // Atomic CAPTURE routines
  2062. // Beginning of a definition (provides name, parameters, gebug trace)
  2063. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  2064. // fixed)
  2065. // OP_ID - operation identifier (add, sub, mul, ...)
  2066. // TYPE - operands' type
  2067. #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
  2068. RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
  2069. TYPE *lhs, TYPE rhs, int flag) { \
  2070. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  2071. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
  2072. // -------------------------------------------------------------------------
  2073. // Operation on *lhs, rhs bound by critical section
  2074. // OP - operator (it's supposed to contain an assignment)
  2075. // LCK_ID - lock identifier
  2076. // Note: don't check gtid as it should always be valid
  2077. // 1, 2-byte - expect valid parameter, other - check before this macro
  2078. #define OP_CRITICAL_CPT(OP, LCK_ID) \
  2079. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2080. \
  2081. if (flag) { \
  2082. (*lhs) OP rhs; \
  2083. new_value = (*lhs); \
  2084. } else { \
  2085. new_value = (*lhs); \
  2086. (*lhs) OP rhs; \
  2087. } \
  2088. \
  2089. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2090. return new_value;
  2091. #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
  2092. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2093. \
  2094. if (flag) { \
  2095. (*lhs) = (TYPE)((*lhs)OP rhs); \
  2096. new_value = (*lhs); \
  2097. } else { \
  2098. new_value = (*lhs); \
  2099. (*lhs) = (TYPE)((*lhs)OP rhs); \
  2100. } \
  2101. \
  2102. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2103. return new_value;
  2104. // ------------------------------------------------------------------------
  2105. #ifdef KMP_GOMP_COMPAT
  2106. #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
  2107. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2108. KMP_CHECK_GTID; \
  2109. OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
  2110. }
  2111. #else
  2112. #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
  2113. #endif /* KMP_GOMP_COMPAT */
  2114. // ------------------------------------------------------------------------
  2115. // Operation on *lhs, rhs using "compare_and_store" routine
  2116. // TYPE - operands' type
  2117. // BITS - size in bits, used to distinguish low level calls
  2118. // OP - operator
  2119. // Note: temp_val introduced in order to force the compiler to read
  2120. // *lhs only once (w/o it the compiler reads *lhs twice)
  2121. #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
  2122. { \
  2123. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  2124. TYPE old_value, new_value; \
  2125. temp_val = *lhs; \
  2126. old_value = temp_val; \
  2127. new_value = (TYPE)(old_value OP rhs); \
  2128. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  2129. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  2130. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  2131. temp_val = *lhs; \
  2132. old_value = temp_val; \
  2133. new_value = (TYPE)(old_value OP rhs); \
  2134. } \
  2135. if (flag) { \
  2136. return new_value; \
  2137. } else \
  2138. return old_value; \
  2139. }
  2140. // -------------------------------------------------------------------------
  2141. #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2142. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2143. TYPE new_value; \
  2144. (void)new_value; \
  2145. OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
  2146. OP_CMPXCHG_CPT(TYPE, BITS, OP) \
  2147. }
  2148. // -------------------------------------------------------------------------
  2149. #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2150. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2151. TYPE old_value, new_value; \
  2152. (void)new_value; \
  2153. OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
  2154. /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
  2155. old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
  2156. if (flag) { \
  2157. return old_value OP rhs; \
  2158. } else \
  2159. return old_value; \
  2160. }
  2161. // -------------------------------------------------------------------------
  2162. ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
  2163. 0) // __kmpc_atomic_fixed4_add_cpt
  2164. ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
  2165. 0) // __kmpc_atomic_fixed4_sub_cpt
  2166. ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
  2167. KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
  2168. ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
  2169. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
  2170. ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
  2171. KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
  2172. ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
  2173. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
  2174. ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
  2175. KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
  2176. ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
  2177. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
  2178. // ------------------------------------------------------------------------
  2179. // Entries definition for integer operands
  2180. // TYPE_ID - operands type and size (fixed4, float4)
  2181. // OP_ID - operation identifier (add, sub, mul, ...)
  2182. // TYPE - operand type
  2183. // BITS - size in bits, used to distinguish low level calls
  2184. // OP - operator (used in critical section)
  2185. // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
  2186. // ------------------------------------------------------------------------
  2187. // Routines for ATOMIC integer operands, other operators
  2188. // ------------------------------------------------------------------------
  2189. // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
  2190. ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
  2191. KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
  2192. ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
  2193. 0) // __kmpc_atomic_fixed1_andb_cpt
  2194. ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
  2195. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
  2196. ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
  2197. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
  2198. ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
  2199. KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
  2200. ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
  2201. 0) // __kmpc_atomic_fixed1_orb_cpt
  2202. ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
  2203. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
  2204. ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
  2205. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
  2206. ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
  2207. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
  2208. ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
  2209. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
  2210. ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
  2211. 0) // __kmpc_atomic_fixed1_xor_cpt
  2212. ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
  2213. KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
  2214. ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
  2215. 0) // __kmpc_atomic_fixed2_andb_cpt
  2216. ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
  2217. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
  2218. ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
  2219. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
  2220. ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
  2221. KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
  2222. ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
  2223. 0) // __kmpc_atomic_fixed2_orb_cpt
  2224. ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
  2225. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
  2226. ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
  2227. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
  2228. ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
  2229. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
  2230. ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
  2231. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
  2232. ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
  2233. 0) // __kmpc_atomic_fixed2_xor_cpt
  2234. ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
  2235. 0) // __kmpc_atomic_fixed4_andb_cpt
  2236. ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
  2237. KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
  2238. ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
  2239. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
  2240. ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
  2241. KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
  2242. ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
  2243. 0) // __kmpc_atomic_fixed4_orb_cpt
  2244. ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
  2245. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
  2246. ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
  2247. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
  2248. ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
  2249. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
  2250. ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
  2251. 0) // __kmpc_atomic_fixed4_xor_cpt
  2252. ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
  2253. KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
  2254. ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
  2255. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
  2256. ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
  2257. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
  2258. ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
  2259. KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
  2260. ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
  2261. KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
  2262. ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
  2263. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
  2264. ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
  2265. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
  2266. ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
  2267. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
  2268. ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
  2269. KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
  2270. ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
  2271. KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
  2272. ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
  2273. KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
  2274. ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
  2275. KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
  2276. ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
  2277. KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
  2278. // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
  2279. // CAPTURE routines for mixed types RHS=float16
  2280. #if KMP_HAVE_QUAD
  2281. // Beginning of a definition (provides name, parameters, gebug trace)
  2282. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  2283. // fixed)
  2284. // OP_ID - operation identifier (add, sub, mul, ...)
  2285. // TYPE - operands' type
  2286. #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
  2287. TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
  2288. ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
  2289. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  2290. KA_TRACE(100, \
  2291. ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
  2292. gtid));
  2293. // -------------------------------------------------------------------------
  2294. #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
  2295. RTYPE, LCK_ID, MASK, GOMP_FLAG) \
  2296. ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
  2297. TYPE new_value; \
  2298. (void)new_value; \
  2299. OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
  2300. OP_CMPXCHG_CPT(TYPE, BITS, OP) \
  2301. }
  2302. // -------------------------------------------------------------------------
  2303. #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
  2304. LCK_ID, GOMP_FLAG) \
  2305. ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
  2306. TYPE new_value; \
  2307. (void)new_value; \
  2308. OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
  2309. OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
  2310. }
  2311. ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
  2312. KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
  2313. ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
  2314. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
  2315. ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
  2316. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
  2317. ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
  2318. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
  2319. ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
  2320. KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
  2321. ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
  2322. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
  2323. ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
  2324. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
  2325. ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
  2326. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
  2327. ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
  2328. KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
  2329. ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
  2330. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
  2331. ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
  2332. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
  2333. ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
  2334. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
  2335. ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
  2336. KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
  2337. ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
  2338. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
  2339. ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
  2340. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
  2341. ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
  2342. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
  2343. ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
  2344. 0) // __kmpc_atomic_fixed4_add_cpt_fp
  2345. ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
  2346. 0) // __kmpc_atomic_fixed4u_add_cpt_fp
  2347. ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
  2348. 0) // __kmpc_atomic_fixed4_sub_cpt_fp
  2349. ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
  2350. 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
  2351. ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
  2352. 0) // __kmpc_atomic_fixed4_mul_cpt_fp
  2353. ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
  2354. 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
  2355. ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
  2356. 0) // __kmpc_atomic_fixed4_div_cpt_fp
  2357. ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
  2358. 0) // __kmpc_atomic_fixed4u_div_cpt_fp
  2359. ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
  2360. KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
  2361. ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
  2362. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
  2363. ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
  2364. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
  2365. ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
  2366. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
  2367. ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
  2368. KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
  2369. ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
  2370. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
  2371. ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
  2372. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
  2373. ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
  2374. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
  2375. ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
  2376. KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
  2377. ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
  2378. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
  2379. ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
  2380. KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
  2381. ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
  2382. KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
  2383. ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
  2384. KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
  2385. ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
  2386. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
  2387. ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
  2388. KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
  2389. ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
  2390. KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
  2391. ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
  2392. 1) // __kmpc_atomic_float10_add_cpt_fp
  2393. ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
  2394. 1) // __kmpc_atomic_float10_sub_cpt_fp
  2395. ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
  2396. 1) // __kmpc_atomic_float10_mul_cpt_fp
  2397. ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
  2398. 1) // __kmpc_atomic_float10_div_cpt_fp
  2399. #endif // KMP_HAVE_QUAD
  2400. // ------------------------------------------------------------------------
  2401. // Routines for C/C++ Reduction operators && and ||
  2402. // -------------------------------------------------------------------------
  2403. // Operation on *lhs, rhs bound by critical section
  2404. // OP - operator (it's supposed to contain an assignment)
  2405. // LCK_ID - lock identifier
  2406. // Note: don't check gtid as it should always be valid
  2407. // 1, 2-byte - expect valid parameter, other - check before this macro
  2408. #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
  2409. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2410. \
  2411. if (flag) { \
  2412. new_value OP rhs; \
  2413. (*lhs) = new_value; \
  2414. } else { \
  2415. new_value = (*lhs); \
  2416. (*lhs) OP rhs; \
  2417. } \
  2418. \
  2419. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
  2420. // ------------------------------------------------------------------------
  2421. #ifdef KMP_GOMP_COMPAT
  2422. #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
  2423. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2424. KMP_CHECK_GTID; \
  2425. OP_CRITICAL_L_CPT(OP, 0); \
  2426. return new_value; \
  2427. }
  2428. #else
  2429. #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
  2430. #endif /* KMP_GOMP_COMPAT */
  2431. // ------------------------------------------------------------------------
  2432. // Need separate macros for &&, || because there is no combined assignment
  2433. #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2434. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2435. TYPE new_value; \
  2436. (void)new_value; \
  2437. OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
  2438. OP_CMPXCHG_CPT(TYPE, BITS, OP) \
  2439. }
  2440. ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
  2441. KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
  2442. ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
  2443. KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
  2444. ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
  2445. KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
  2446. ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
  2447. KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
  2448. ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
  2449. 0) // __kmpc_atomic_fixed4_andl_cpt
  2450. ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
  2451. 0) // __kmpc_atomic_fixed4_orl_cpt
  2452. ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
  2453. KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
  2454. ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
  2455. KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
  2456. // -------------------------------------------------------------------------
  2457. // Routines for Fortran operators that matched no one in C:
  2458. // MAX, MIN, .EQV., .NEQV.
  2459. // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
  2460. // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
  2461. // -------------------------------------------------------------------------
  2462. // MIN and MAX need separate macros
  2463. // OP - operator to check if we need any actions?
  2464. #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
  2465. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2466. \
  2467. if (*lhs OP rhs) { /* still need actions? */ \
  2468. old_value = *lhs; \
  2469. *lhs = rhs; \
  2470. if (flag) \
  2471. new_value = rhs; \
  2472. else \
  2473. new_value = old_value; \
  2474. } else { \
  2475. new_value = *lhs; \
  2476. } \
  2477. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2478. return new_value;
  2479. // -------------------------------------------------------------------------
  2480. #ifdef KMP_GOMP_COMPAT
  2481. #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
  2482. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2483. KMP_CHECK_GTID; \
  2484. MIN_MAX_CRITSECT_CPT(OP, 0); \
  2485. }
  2486. #else
  2487. #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
  2488. #endif /* KMP_GOMP_COMPAT */
  2489. // -------------------------------------------------------------------------
  2490. #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
  2491. { \
  2492. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  2493. /*TYPE old_value; */ \
  2494. temp_val = *lhs; \
  2495. old_value = temp_val; \
  2496. while (old_value OP rhs && /* still need actions? */ \
  2497. !KMP_COMPARE_AND_STORE_ACQ##BITS( \
  2498. (kmp_int##BITS *)lhs, \
  2499. *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  2500. *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
  2501. temp_val = *lhs; \
  2502. old_value = temp_val; \
  2503. } \
  2504. if (flag) \
  2505. return rhs; \
  2506. else \
  2507. return old_value; \
  2508. }
  2509. // -------------------------------------------------------------------------
  2510. // 1-byte, 2-byte operands - use critical section
  2511. #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  2512. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2513. TYPE new_value, old_value; \
  2514. if (*lhs OP rhs) { /* need actions? */ \
  2515. GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
  2516. MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
  2517. } \
  2518. return *lhs; \
  2519. }
  2520. #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2521. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2522. TYPE new_value, old_value; \
  2523. (void)new_value; \
  2524. if (*lhs OP rhs) { \
  2525. GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
  2526. MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
  2527. } \
  2528. return *lhs; \
  2529. }
  2530. MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
  2531. KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
  2532. MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
  2533. KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
  2534. MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
  2535. KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
  2536. MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
  2537. KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
  2538. MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
  2539. 0) // __kmpc_atomic_fixed4_max_cpt
  2540. MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
  2541. 0) // __kmpc_atomic_fixed4_min_cpt
  2542. MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
  2543. KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
  2544. MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
  2545. KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
  2546. MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
  2547. KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
  2548. MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
  2549. KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
  2550. MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
  2551. KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
  2552. MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
  2553. KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
  2554. MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
  2555. 1) // __kmpc_atomic_float10_max_cpt
  2556. MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
  2557. 1) // __kmpc_atomic_float10_min_cpt
  2558. #if KMP_HAVE_QUAD
  2559. MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
  2560. 1) // __kmpc_atomic_float16_max_cpt
  2561. MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
  2562. 1) // __kmpc_atomic_float16_min_cpt
  2563. #if (KMP_ARCH_X86)
  2564. MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
  2565. 1) // __kmpc_atomic_float16_max_a16_cpt
  2566. MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
  2567. 1) // __kmpc_atomic_float16_mix_a16_cpt
  2568. #endif // (KMP_ARCH_X86)
  2569. #endif // KMP_HAVE_QUAD
  2570. // ------------------------------------------------------------------------
  2571. #ifdef KMP_GOMP_COMPAT
  2572. #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
  2573. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2574. KMP_CHECK_GTID; \
  2575. OP_CRITICAL_CPT(OP, 0); \
  2576. }
  2577. #else
  2578. #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
  2579. #endif /* KMP_GOMP_COMPAT */
  2580. // ------------------------------------------------------------------------
  2581. #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2582. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2583. TYPE new_value; \
  2584. (void)new_value; \
  2585. OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
  2586. OP_CMPXCHG_CPT(TYPE, BITS, OP) \
  2587. }
  2588. // ------------------------------------------------------------------------
  2589. ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
  2590. KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
  2591. ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
  2592. KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
  2593. ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
  2594. KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
  2595. ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
  2596. KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
  2597. ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
  2598. KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
  2599. ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
  2600. KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
  2601. ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
  2602. KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
  2603. ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
  2604. KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
  2605. // ------------------------------------------------------------------------
  2606. // Routines for Extended types: long double, _Quad, complex flavours (use
  2607. // critical section)
  2608. // TYPE_ID, OP_ID, TYPE - detailed above
  2609. // OP - operator
  2610. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  2611. #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  2612. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2613. TYPE new_value; \
  2614. OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
  2615. OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
  2616. }
  2617. // ------------------------------------------------------------------------
  2618. // Workaround for cmplx4. Regular routines with return value don't work
  2619. // on Win_32e. Let's return captured values through the additional parameter.
  2620. #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
  2621. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2622. \
  2623. if (flag) { \
  2624. (*lhs) OP rhs; \
  2625. (*out) = (*lhs); \
  2626. } else { \
  2627. (*out) = (*lhs); \
  2628. (*lhs) OP rhs; \
  2629. } \
  2630. \
  2631. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2632. return;
  2633. // ------------------------------------------------------------------------
  2634. #ifdef KMP_GOMP_COMPAT
  2635. #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
  2636. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2637. KMP_CHECK_GTID; \
  2638. OP_CRITICAL_CPT_WRK(OP## =, 0); \
  2639. }
  2640. #else
  2641. #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
  2642. #endif /* KMP_GOMP_COMPAT */
  2643. // ------------------------------------------------------------------------
  2644. #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
  2645. void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
  2646. TYPE rhs, TYPE *out, int flag) { \
  2647. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  2648. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
  2649. // ------------------------------------------------------------------------
  2650. #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  2651. ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
  2652. OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
  2653. OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
  2654. }
  2655. // The end of workaround for cmplx4
  2656. /* ------------------------------------------------------------------------- */
  2657. // routines for long double type
  2658. ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
  2659. 1) // __kmpc_atomic_float10_add_cpt
  2660. ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
  2661. 1) // __kmpc_atomic_float10_sub_cpt
  2662. ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
  2663. 1) // __kmpc_atomic_float10_mul_cpt
  2664. ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
  2665. 1) // __kmpc_atomic_float10_div_cpt
  2666. #if KMP_HAVE_QUAD
  2667. // routines for _Quad type
  2668. ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
  2669. 1) // __kmpc_atomic_float16_add_cpt
  2670. ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
  2671. 1) // __kmpc_atomic_float16_sub_cpt
  2672. ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
  2673. 1) // __kmpc_atomic_float16_mul_cpt
  2674. ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
  2675. 1) // __kmpc_atomic_float16_div_cpt
  2676. #if (KMP_ARCH_X86)
  2677. ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
  2678. 1) // __kmpc_atomic_float16_add_a16_cpt
  2679. ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
  2680. 1) // __kmpc_atomic_float16_sub_a16_cpt
  2681. ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
  2682. 1) // __kmpc_atomic_float16_mul_a16_cpt
  2683. ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
  2684. 1) // __kmpc_atomic_float16_div_a16_cpt
  2685. #endif // (KMP_ARCH_X86)
  2686. #endif // KMP_HAVE_QUAD
  2687. // routines for complex types
  2688. // cmplx4 routines to return void
  2689. ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
  2690. 1) // __kmpc_atomic_cmplx4_add_cpt
  2691. ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
  2692. 1) // __kmpc_atomic_cmplx4_sub_cpt
  2693. ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
  2694. 1) // __kmpc_atomic_cmplx4_mul_cpt
  2695. ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
  2696. 1) // __kmpc_atomic_cmplx4_div_cpt
  2697. ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
  2698. 1) // __kmpc_atomic_cmplx8_add_cpt
  2699. ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
  2700. 1) // __kmpc_atomic_cmplx8_sub_cpt
  2701. ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
  2702. 1) // __kmpc_atomic_cmplx8_mul_cpt
  2703. ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
  2704. 1) // __kmpc_atomic_cmplx8_div_cpt
  2705. ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
  2706. 1) // __kmpc_atomic_cmplx10_add_cpt
  2707. ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
  2708. 1) // __kmpc_atomic_cmplx10_sub_cpt
  2709. ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
  2710. 1) // __kmpc_atomic_cmplx10_mul_cpt
  2711. ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
  2712. 1) // __kmpc_atomic_cmplx10_div_cpt
  2713. #if KMP_HAVE_QUAD
  2714. ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
  2715. 1) // __kmpc_atomic_cmplx16_add_cpt
  2716. ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
  2717. 1) // __kmpc_atomic_cmplx16_sub_cpt
  2718. ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
  2719. 1) // __kmpc_atomic_cmplx16_mul_cpt
  2720. ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
  2721. 1) // __kmpc_atomic_cmplx16_div_cpt
  2722. #if (KMP_ARCH_X86)
  2723. ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
  2724. 1) // __kmpc_atomic_cmplx16_add_a16_cpt
  2725. ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
  2726. 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
  2727. ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
  2728. 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
  2729. ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
  2730. 1) // __kmpc_atomic_cmplx16_div_a16_cpt
  2731. #endif // (KMP_ARCH_X86)
  2732. #endif // KMP_HAVE_QUAD
  2733. // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
  2734. // binop x; v = x; } for non-commutative operations.
  2735. // Supported only on IA-32 architecture and Intel(R) 64
  2736. // -------------------------------------------------------------------------
  2737. // Operation on *lhs, rhs bound by critical section
  2738. // OP - operator (it's supposed to contain an assignment)
  2739. // LCK_ID - lock identifier
  2740. // Note: don't check gtid as it should always be valid
  2741. // 1, 2-byte - expect valid parameter, other - check before this macro
  2742. #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
  2743. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2744. \
  2745. if (flag) { \
  2746. /*temp_val = (*lhs);*/ \
  2747. (*lhs) = (TYPE)((rhs)OP(*lhs)); \
  2748. new_value = (*lhs); \
  2749. } else { \
  2750. new_value = (*lhs); \
  2751. (*lhs) = (TYPE)((rhs)OP(*lhs)); \
  2752. } \
  2753. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2754. return new_value;
  2755. // ------------------------------------------------------------------------
  2756. #ifdef KMP_GOMP_COMPAT
  2757. #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
  2758. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2759. KMP_CHECK_GTID; \
  2760. OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
  2761. }
  2762. #else
  2763. #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
  2764. #endif /* KMP_GOMP_COMPAT */
  2765. // ------------------------------------------------------------------------
  2766. // Operation on *lhs, rhs using "compare_and_store" routine
  2767. // TYPE - operands' type
  2768. // BITS - size in bits, used to distinguish low level calls
  2769. // OP - operator
  2770. // Note: temp_val introduced in order to force the compiler to read
  2771. // *lhs only once (w/o it the compiler reads *lhs twice)
  2772. #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
  2773. { \
  2774. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  2775. TYPE old_value, new_value; \
  2776. temp_val = *lhs; \
  2777. old_value = temp_val; \
  2778. new_value = (TYPE)(rhs OP old_value); \
  2779. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  2780. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  2781. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  2782. temp_val = *lhs; \
  2783. old_value = temp_val; \
  2784. new_value = (TYPE)(rhs OP old_value); \
  2785. } \
  2786. if (flag) { \
  2787. return new_value; \
  2788. } else \
  2789. return old_value; \
  2790. }
  2791. // -------------------------------------------------------------------------
  2792. #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
  2793. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2794. TYPE new_value; \
  2795. (void)new_value; \
  2796. OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
  2797. OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
  2798. }
  2799. ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
  2800. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
  2801. ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
  2802. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
  2803. ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
  2804. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
  2805. ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
  2806. KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
  2807. ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
  2808. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
  2809. ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
  2810. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
  2811. ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
  2812. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
  2813. ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
  2814. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
  2815. ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
  2816. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
  2817. ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
  2818. KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
  2819. ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
  2820. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
  2821. ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
  2822. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
  2823. ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
  2824. KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
  2825. ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
  2826. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
  2827. ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
  2828. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
  2829. ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
  2830. KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
  2831. ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
  2832. KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
  2833. ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
  2834. KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
  2835. ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
  2836. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
  2837. ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
  2838. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
  2839. ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
  2840. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
  2841. ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
  2842. KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
  2843. ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
  2844. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
  2845. ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
  2846. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
  2847. ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
  2848. KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
  2849. ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
  2850. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
  2851. ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
  2852. KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
  2853. ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
  2854. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
  2855. // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
  2856. // ------------------------------------------------------------------------
  2857. // Routines for Extended types: long double, _Quad, complex flavours (use
  2858. // critical section)
  2859. // TYPE_ID, OP_ID, TYPE - detailed above
  2860. // OP - operator
  2861. // LCK_ID - lock identifier, used to possibly distinguish lock variable
  2862. #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
  2863. ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
  2864. TYPE new_value; \
  2865. /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
  2866. OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
  2867. OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
  2868. }
  2869. /* ------------------------------------------------------------------------- */
  2870. // routines for long double type
  2871. ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
  2872. 1) // __kmpc_atomic_float10_sub_cpt_rev
  2873. ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
  2874. 1) // __kmpc_atomic_float10_div_cpt_rev
  2875. #if KMP_HAVE_QUAD
  2876. // routines for _Quad type
  2877. ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
  2878. 1) // __kmpc_atomic_float16_sub_cpt_rev
  2879. ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
  2880. 1) // __kmpc_atomic_float16_div_cpt_rev
  2881. #if (KMP_ARCH_X86)
  2882. ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
  2883. 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
  2884. ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
  2885. 1) // __kmpc_atomic_float16_div_a16_cpt_rev
  2886. #endif // (KMP_ARCH_X86)
  2887. #endif // KMP_HAVE_QUAD
  2888. // routines for complex types
  2889. // ------------------------------------------------------------------------
  2890. // Workaround for cmplx4. Regular routines with return value don't work
  2891. // on Win_32e. Let's return captured values through the additional parameter.
  2892. #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
  2893. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2894. \
  2895. if (flag) { \
  2896. (*lhs) = (rhs)OP(*lhs); \
  2897. (*out) = (*lhs); \
  2898. } else { \
  2899. (*out) = (*lhs); \
  2900. (*lhs) = (rhs)OP(*lhs); \
  2901. } \
  2902. \
  2903. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  2904. return;
  2905. // ------------------------------------------------------------------------
  2906. #ifdef KMP_GOMP_COMPAT
  2907. #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
  2908. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  2909. KMP_CHECK_GTID; \
  2910. OP_CRITICAL_CPT_REV_WRK(OP, 0); \
  2911. }
  2912. #else
  2913. #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
  2914. #endif /* KMP_GOMP_COMPAT */
  2915. // ------------------------------------------------------------------------
  2916. #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
  2917. GOMP_FLAG) \
  2918. ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
  2919. OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
  2920. OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
  2921. }
  2922. // The end of workaround for cmplx4
  2923. // !!! TODO: check if we need to return void for cmplx4 routines
  2924. // cmplx4 routines to return void
  2925. ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
  2926. 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
  2927. ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
  2928. 1) // __kmpc_atomic_cmplx4_div_cpt_rev
  2929. ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
  2930. 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
  2931. ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
  2932. 1) // __kmpc_atomic_cmplx8_div_cpt_rev
  2933. ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
  2934. 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
  2935. ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
  2936. 1) // __kmpc_atomic_cmplx10_div_cpt_rev
  2937. #if KMP_HAVE_QUAD
  2938. ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
  2939. 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
  2940. ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
  2941. 1) // __kmpc_atomic_cmplx16_div_cpt_rev
  2942. #if (KMP_ARCH_X86)
  2943. ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
  2944. 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
  2945. ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
  2946. 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
  2947. #endif // (KMP_ARCH_X86)
  2948. #endif // KMP_HAVE_QUAD
  2949. // Capture reverse for mixed type: RHS=float16
  2950. #if KMP_HAVE_QUAD
  2951. // Beginning of a definition (provides name, parameters, gebug trace)
  2952. // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
  2953. // fixed)
  2954. // OP_ID - operation identifier (add, sub, mul, ...)
  2955. // TYPE - operands' type
  2956. // -------------------------------------------------------------------------
  2957. #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
  2958. RTYPE, LCK_ID, MASK, GOMP_FLAG) \
  2959. ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
  2960. TYPE new_value; \
  2961. (void)new_value; \
  2962. OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
  2963. OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
  2964. }
  2965. // -------------------------------------------------------------------------
  2966. #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
  2967. LCK_ID, GOMP_FLAG) \
  2968. ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
  2969. TYPE new_value; \
  2970. (void)new_value; \
  2971. OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
  2972. OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
  2973. }
  2974. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
  2975. KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
  2976. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
  2977. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
  2978. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
  2979. KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
  2980. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
  2981. KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
  2982. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
  2983. KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
  2984. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
  2985. 1,
  2986. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
  2987. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
  2988. KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
  2989. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
  2990. 1,
  2991. KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
  2992. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
  2993. 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
  2994. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
  2995. 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
  2996. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
  2997. 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
  2998. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
  2999. 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
  3000. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
  3001. 7,
  3002. KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
  3003. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
  3004. 8i, 7,
  3005. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
  3006. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
  3007. 7,
  3008. KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
  3009. ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
  3010. 8i, 7,
  3011. KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
  3012. ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
  3013. 4r, 3,
  3014. KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
  3015. ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
  3016. 4r, 3,
  3017. KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
  3018. ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
  3019. 8r, 7,
  3020. KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
  3021. ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
  3022. 8r, 7,
  3023. KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
  3024. ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
  3025. 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
  3026. ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
  3027. 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
  3028. #endif // KMP_HAVE_QUAD
  3029. // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
  3030. #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
  3031. TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
  3032. TYPE rhs) { \
  3033. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  3034. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
  3035. #define CRITICAL_SWP(LCK_ID) \
  3036. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  3037. \
  3038. old_value = (*lhs); \
  3039. (*lhs) = rhs; \
  3040. \
  3041. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  3042. return old_value;
  3043. // ------------------------------------------------------------------------
  3044. #ifdef KMP_GOMP_COMPAT
  3045. #define GOMP_CRITICAL_SWP(FLAG) \
  3046. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  3047. KMP_CHECK_GTID; \
  3048. CRITICAL_SWP(0); \
  3049. }
  3050. #else
  3051. #define GOMP_CRITICAL_SWP(FLAG)
  3052. #endif /* KMP_GOMP_COMPAT */
  3053. #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
  3054. ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
  3055. TYPE old_value; \
  3056. GOMP_CRITICAL_SWP(GOMP_FLAG) \
  3057. old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
  3058. return old_value; \
  3059. }
  3060. // ------------------------------------------------------------------------
  3061. #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
  3062. ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
  3063. TYPE old_value; \
  3064. GOMP_CRITICAL_SWP(GOMP_FLAG) \
  3065. old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
  3066. return old_value; \
  3067. }
  3068. // ------------------------------------------------------------------------
  3069. #define CMPXCHG_SWP(TYPE, BITS) \
  3070. { \
  3071. TYPE KMP_ATOMIC_VOLATILE temp_val; \
  3072. TYPE old_value, new_value; \
  3073. temp_val = *lhs; \
  3074. old_value = temp_val; \
  3075. new_value = rhs; \
  3076. while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
  3077. (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
  3078. *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
  3079. temp_val = *lhs; \
  3080. old_value = temp_val; \
  3081. new_value = rhs; \
  3082. } \
  3083. return old_value; \
  3084. }
  3085. // -------------------------------------------------------------------------
  3086. #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
  3087. ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
  3088. TYPE old_value; \
  3089. (void)old_value; \
  3090. GOMP_CRITICAL_SWP(GOMP_FLAG) \
  3091. CMPXCHG_SWP(TYPE, BITS) \
  3092. }
  3093. ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
  3094. ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
  3095. ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
  3096. ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
  3097. KMP_ARCH_X86) // __kmpc_atomic_float4_swp
  3098. #if (KMP_ARCH_X86)
  3099. ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
  3100. KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
  3101. ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
  3102. KMP_ARCH_X86) // __kmpc_atomic_float8_swp
  3103. #else
  3104. ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
  3105. ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
  3106. KMP_ARCH_X86) // __kmpc_atomic_float8_swp
  3107. #endif // (KMP_ARCH_X86)
  3108. // ------------------------------------------------------------------------
  3109. // Routines for Extended types: long double, _Quad, complex flavours (use
  3110. // critical section)
  3111. #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
  3112. ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
  3113. TYPE old_value; \
  3114. GOMP_CRITICAL_SWP(GOMP_FLAG) \
  3115. CRITICAL_SWP(LCK_ID) \
  3116. }
  3117. // ------------------------------------------------------------------------
  3118. // !!! TODO: check if we need to return void for cmplx4 routines
  3119. // Workaround for cmplx4. Regular routines with return value don't work
  3120. // on Win_32e. Let's return captured values through the additional parameter.
  3121. #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
  3122. void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
  3123. TYPE rhs, TYPE *out) { \
  3124. KMP_DEBUG_ASSERT(__kmp_init_serial); \
  3125. KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
  3126. #define CRITICAL_SWP_WRK(LCK_ID) \
  3127. __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  3128. \
  3129. tmp = (*lhs); \
  3130. (*lhs) = (rhs); \
  3131. (*out) = tmp; \
  3132. __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
  3133. return;
  3134. // ------------------------------------------------------------------------
  3135. #ifdef KMP_GOMP_COMPAT
  3136. #define GOMP_CRITICAL_SWP_WRK(FLAG) \
  3137. if ((FLAG) && (__kmp_atomic_mode == 2)) { \
  3138. KMP_CHECK_GTID; \
  3139. CRITICAL_SWP_WRK(0); \
  3140. }
  3141. #else
  3142. #define GOMP_CRITICAL_SWP_WRK(FLAG)
  3143. #endif /* KMP_GOMP_COMPAT */
  3144. // ------------------------------------------------------------------------
  3145. #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
  3146. ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
  3147. TYPE tmp; \
  3148. GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
  3149. CRITICAL_SWP_WRK(LCK_ID) \
  3150. }
  3151. // The end of workaround for cmplx4
  3152. ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
  3153. #if KMP_HAVE_QUAD
  3154. ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
  3155. #endif // KMP_HAVE_QUAD
  3156. // cmplx4 routine to return void
  3157. ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
  3158. // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
  3159. // __kmpc_atomic_cmplx4_swp
  3160. ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
  3161. ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
  3162. #if KMP_HAVE_QUAD
  3163. ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
  3164. #if (KMP_ARCH_X86)
  3165. ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
  3166. 1) // __kmpc_atomic_float16_a16_swp
  3167. ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
  3168. 1) // __kmpc_atomic_cmplx16_a16_swp
  3169. #endif // (KMP_ARCH_X86)
  3170. #endif // KMP_HAVE_QUAD
  3171. // End of OpenMP 4.0 Capture
  3172. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  3173. #undef OP_CRITICAL
  3174. /* ------------------------------------------------------------------------ */
  3175. /* Generic atomic routines */
  3176. void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3177. void (*f)(void *, void *, void *)) {
  3178. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3179. if (
  3180. #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3181. FALSE /* must use lock */
  3182. #else
  3183. TRUE
  3184. #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3185. ) {
  3186. kmp_int8 old_value, new_value;
  3187. old_value = *(kmp_int8 *)lhs;
  3188. (*f)(&new_value, &old_value, rhs);
  3189. /* TODO: Should this be acquire or release? */
  3190. while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
  3191. *(kmp_int8 *)&new_value)) {
  3192. KMP_CPU_PAUSE();
  3193. old_value = *(kmp_int8 *)lhs;
  3194. (*f)(&new_value, &old_value, rhs);
  3195. }
  3196. return;
  3197. } else {
  3198. // All 1-byte data is of integer data type.
  3199. #ifdef KMP_GOMP_COMPAT
  3200. if (__kmp_atomic_mode == 2) {
  3201. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3202. } else
  3203. #endif /* KMP_GOMP_COMPAT */
  3204. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
  3205. (*f)(lhs, lhs, rhs);
  3206. #ifdef KMP_GOMP_COMPAT
  3207. if (__kmp_atomic_mode == 2) {
  3208. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3209. } else
  3210. #endif /* KMP_GOMP_COMPAT */
  3211. __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
  3212. }
  3213. }
  3214. void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3215. void (*f)(void *, void *, void *)) {
  3216. if (
  3217. #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3218. FALSE /* must use lock */
  3219. #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
  3220. TRUE /* no alignment problems */
  3221. #else
  3222. !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
  3223. #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3224. ) {
  3225. kmp_int16 old_value, new_value;
  3226. old_value = *(kmp_int16 *)lhs;
  3227. (*f)(&new_value, &old_value, rhs);
  3228. /* TODO: Should this be acquire or release? */
  3229. while (!KMP_COMPARE_AND_STORE_ACQ16(
  3230. (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
  3231. KMP_CPU_PAUSE();
  3232. old_value = *(kmp_int16 *)lhs;
  3233. (*f)(&new_value, &old_value, rhs);
  3234. }
  3235. return;
  3236. } else {
  3237. // All 2-byte data is of integer data type.
  3238. #ifdef KMP_GOMP_COMPAT
  3239. if (__kmp_atomic_mode == 2) {
  3240. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3241. } else
  3242. #endif /* KMP_GOMP_COMPAT */
  3243. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
  3244. (*f)(lhs, lhs, rhs);
  3245. #ifdef KMP_GOMP_COMPAT
  3246. if (__kmp_atomic_mode == 2) {
  3247. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3248. } else
  3249. #endif /* KMP_GOMP_COMPAT */
  3250. __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
  3251. }
  3252. }
  3253. void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3254. void (*f)(void *, void *, void *)) {
  3255. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3256. if (
  3257. // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
  3258. // Gomp compatibility is broken if this routine is called for floats.
  3259. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  3260. TRUE /* no alignment problems */
  3261. #else
  3262. !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
  3263. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  3264. ) {
  3265. kmp_int32 old_value, new_value;
  3266. old_value = *(kmp_int32 *)lhs;
  3267. (*f)(&new_value, &old_value, rhs);
  3268. /* TODO: Should this be acquire or release? */
  3269. while (!KMP_COMPARE_AND_STORE_ACQ32(
  3270. (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
  3271. KMP_CPU_PAUSE();
  3272. old_value = *(kmp_int32 *)lhs;
  3273. (*f)(&new_value, &old_value, rhs);
  3274. }
  3275. return;
  3276. } else {
  3277. // Use __kmp_atomic_lock_4i for all 4-byte data,
  3278. // even if it isn't of integer data type.
  3279. #ifdef KMP_GOMP_COMPAT
  3280. if (__kmp_atomic_mode == 2) {
  3281. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3282. } else
  3283. #endif /* KMP_GOMP_COMPAT */
  3284. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
  3285. (*f)(lhs, lhs, rhs);
  3286. #ifdef KMP_GOMP_COMPAT
  3287. if (__kmp_atomic_mode == 2) {
  3288. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3289. } else
  3290. #endif /* KMP_GOMP_COMPAT */
  3291. __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
  3292. }
  3293. }
  3294. void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3295. void (*f)(void *, void *, void *)) {
  3296. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3297. if (
  3298. #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3299. FALSE /* must use lock */
  3300. #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
  3301. TRUE /* no alignment problems */
  3302. #else
  3303. !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
  3304. #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
  3305. ) {
  3306. kmp_int64 old_value, new_value;
  3307. old_value = *(kmp_int64 *)lhs;
  3308. (*f)(&new_value, &old_value, rhs);
  3309. /* TODO: Should this be acquire or release? */
  3310. while (!KMP_COMPARE_AND_STORE_ACQ64(
  3311. (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
  3312. KMP_CPU_PAUSE();
  3313. old_value = *(kmp_int64 *)lhs;
  3314. (*f)(&new_value, &old_value, rhs);
  3315. }
  3316. return;
  3317. } else {
  3318. // Use __kmp_atomic_lock_8i for all 8-byte data,
  3319. // even if it isn't of integer data type.
  3320. #ifdef KMP_GOMP_COMPAT
  3321. if (__kmp_atomic_mode == 2) {
  3322. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3323. } else
  3324. #endif /* KMP_GOMP_COMPAT */
  3325. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
  3326. (*f)(lhs, lhs, rhs);
  3327. #ifdef KMP_GOMP_COMPAT
  3328. if (__kmp_atomic_mode == 2) {
  3329. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3330. } else
  3331. #endif /* KMP_GOMP_COMPAT */
  3332. __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
  3333. }
  3334. }
  3335. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  3336. void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3337. void (*f)(void *, void *, void *)) {
  3338. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3339. #ifdef KMP_GOMP_COMPAT
  3340. if (__kmp_atomic_mode == 2) {
  3341. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3342. } else
  3343. #endif /* KMP_GOMP_COMPAT */
  3344. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
  3345. (*f)(lhs, lhs, rhs);
  3346. #ifdef KMP_GOMP_COMPAT
  3347. if (__kmp_atomic_mode == 2) {
  3348. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3349. } else
  3350. #endif /* KMP_GOMP_COMPAT */
  3351. __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
  3352. }
  3353. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  3354. void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3355. void (*f)(void *, void *, void *)) {
  3356. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3357. #ifdef KMP_GOMP_COMPAT
  3358. if (__kmp_atomic_mode == 2) {
  3359. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3360. } else
  3361. #endif /* KMP_GOMP_COMPAT */
  3362. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
  3363. (*f)(lhs, lhs, rhs);
  3364. #ifdef KMP_GOMP_COMPAT
  3365. if (__kmp_atomic_mode == 2) {
  3366. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3367. } else
  3368. #endif /* KMP_GOMP_COMPAT */
  3369. __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
  3370. }
  3371. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  3372. void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3373. void (*f)(void *, void *, void *)) {
  3374. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3375. #ifdef KMP_GOMP_COMPAT
  3376. if (__kmp_atomic_mode == 2) {
  3377. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3378. } else
  3379. #endif /* KMP_GOMP_COMPAT */
  3380. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
  3381. (*f)(lhs, lhs, rhs);
  3382. #ifdef KMP_GOMP_COMPAT
  3383. if (__kmp_atomic_mode == 2) {
  3384. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3385. } else
  3386. #endif /* KMP_GOMP_COMPAT */
  3387. __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
  3388. }
  3389. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  3390. void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
  3391. void (*f)(void *, void *, void *)) {
  3392. KMP_DEBUG_ASSERT(__kmp_init_serial);
  3393. #ifdef KMP_GOMP_COMPAT
  3394. if (__kmp_atomic_mode == 2) {
  3395. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3396. } else
  3397. #endif /* KMP_GOMP_COMPAT */
  3398. __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
  3399. (*f)(lhs, lhs, rhs);
  3400. #ifdef KMP_GOMP_COMPAT
  3401. if (__kmp_atomic_mode == 2) {
  3402. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3403. } else
  3404. #endif /* KMP_GOMP_COMPAT */
  3405. __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
  3406. }
  3407. // AC: same two routines as GOMP_atomic_start/end, but will be called by our
  3408. // compiler; duplicated in order to not use 3-party names in pure Intel code
  3409. // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
  3410. void __kmpc_atomic_start(void) {
  3411. int gtid = __kmp_entry_gtid();
  3412. KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
  3413. __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
  3414. }
  3415. void __kmpc_atomic_end(void) {
  3416. int gtid = __kmp_get_gtid();
  3417. KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
  3418. __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
  3419. }
  3420. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  3421. // OpenMP 5.1 compare and swap
  3422. /*!
  3423. @param loc Source code location
  3424. @param gtid Global thread id
  3425. @param x Memory location to operate on
  3426. @param e Expected value
  3427. @param d Desired value
  3428. @return Result of comparison
  3429. Implements Compare And Swap atomic operation.
  3430. Sample code:
  3431. #pragma omp atomic compare update capture
  3432. { r = x == e; if(r) { x = d; } }
  3433. */
  3434. bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
  3435. return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
  3436. }
  3437. bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
  3438. short d) {
  3439. return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
  3440. }
  3441. bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
  3442. kmp_int32 d) {
  3443. return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
  3444. }
  3445. bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
  3446. kmp_int64 d) {
  3447. return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
  3448. }
  3449. /*!
  3450. @param loc Source code location
  3451. @param gtid Global thread id
  3452. @param x Memory location to operate on
  3453. @param e Expected value
  3454. @param d Desired value
  3455. @return Old value of x
  3456. Implements Compare And Swap atomic operation.
  3457. Sample code:
  3458. #pragma omp atomic compare update capture
  3459. { v = x; if (x == e) { x = d; } }
  3460. */
  3461. char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
  3462. return KMP_COMPARE_AND_STORE_RET8(x, e, d);
  3463. }
  3464. short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
  3465. short d) {
  3466. return KMP_COMPARE_AND_STORE_RET16(x, e, d);
  3467. }
  3468. kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
  3469. kmp_int32 e, kmp_int32 d) {
  3470. return KMP_COMPARE_AND_STORE_RET32(x, e, d);
  3471. }
  3472. kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
  3473. kmp_int64 e, kmp_int64 d) {
  3474. return KMP_COMPARE_AND_STORE_RET64(x, e, d);
  3475. }
  3476. /*!
  3477. @param loc Source code location
  3478. @param gtid Global thread id
  3479. @param x Memory location to operate on
  3480. @param e Expected value
  3481. @param d Desired value
  3482. @param pv Captured value location
  3483. @return Result of comparison
  3484. Implements Compare And Swap + Capture atomic operation.
  3485. v gets old valie of x if comparison failed, untouched otherwise.
  3486. Sample code:
  3487. #pragma omp atomic compare update capture
  3488. { r = x == e; if(r) { x = d; } else { v = x; } }
  3489. */
  3490. bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
  3491. char d, char *pv) {
  3492. char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
  3493. if (old == e)
  3494. return true;
  3495. KMP_ASSERT(pv != NULL);
  3496. *pv = old;
  3497. return false;
  3498. }
  3499. bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
  3500. short d, short *pv) {
  3501. short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
  3502. if (old == e)
  3503. return true;
  3504. KMP_ASSERT(pv != NULL);
  3505. *pv = old;
  3506. return false;
  3507. }
  3508. bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
  3509. kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
  3510. kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
  3511. if (old == e)
  3512. return true;
  3513. KMP_ASSERT(pv != NULL);
  3514. *pv = old;
  3515. return false;
  3516. }
  3517. bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
  3518. kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
  3519. kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
  3520. if (old == e)
  3521. return true;
  3522. KMP_ASSERT(pv != NULL);
  3523. *pv = old;
  3524. return false;
  3525. }
  3526. /*!
  3527. @param loc Source code location
  3528. @param gtid Global thread id
  3529. @param x Memory location to operate on
  3530. @param e Expected value
  3531. @param d Desired value
  3532. @param pv Captured value location
  3533. @return Old value of x
  3534. Implements Compare And Swap + Capture atomic operation.
  3535. v gets new valie of x.
  3536. Sample code:
  3537. #pragma omp atomic compare update capture
  3538. { if (x == e) { x = d; }; v = x; }
  3539. */
  3540. char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
  3541. char d, char *pv) {
  3542. char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
  3543. KMP_ASSERT(pv != NULL);
  3544. *pv = old == e ? d : old;
  3545. return old;
  3546. }
  3547. short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
  3548. short d, short *pv) {
  3549. short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
  3550. KMP_ASSERT(pv != NULL);
  3551. *pv = old == e ? d : old;
  3552. return old;
  3553. }
  3554. kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
  3555. kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
  3556. kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
  3557. KMP_ASSERT(pv != NULL);
  3558. *pv = old == e ? d : old;
  3559. return old;
  3560. }
  3561. kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
  3562. kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
  3563. kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
  3564. KMP_ASSERT(pv != NULL);
  3565. *pv = old == e ? d : old;
  3566. return old;
  3567. }
  3568. // End OpenMP 5.1 compare + capture
  3569. #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
  3570. /*!
  3571. @}
  3572. */
  3573. // end of file