z_Linux_asm.S 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762
  1. // z_Linux_asm.S: - microtasking routines specifically
  2. // written for Intel platforms running Linux* OS
  3. //
  4. ////===----------------------------------------------------------------------===//
  5. ////
  6. //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  7. //// See https://llvm.org/LICENSE.txt for license information.
  8. //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  9. ////
  10. ////===----------------------------------------------------------------------===//
  11. //
  12. // -----------------------------------------------------------------------
  13. // macros
  14. // -----------------------------------------------------------------------
  15. #include "kmp_config.h"
  16. #if KMP_ARCH_X86 || KMP_ARCH_X86_64
  17. # if KMP_MIC
  18. // the 'delay r16/r32/r64' should be used instead of the 'pause'.
  19. // The delay operation has the effect of removing the current thread from
  20. // the round-robin HT mechanism, and therefore speeds up the issue rate of
  21. // the other threads on the same core.
  22. //
  23. // A value of 0 works fine for <= 2 threads per core, but causes the EPCC
  24. // barrier time to increase greatly for 3 or more threads per core.
  25. //
  26. // A value of 100 works pretty well for up to 4 threads per core, but isn't
  27. // quite as fast as 0 for 2 threads per core.
  28. //
  29. // We need to check what happens for oversubscription / > 4 threads per core.
  30. // It is possible that we need to pass the delay value in as a parameter
  31. // that the caller determines based on the total # threads / # cores.
  32. //
  33. //.macro pause_op
  34. // mov $100, %rax
  35. // delay %rax
  36. //.endm
  37. # else
  38. # define pause_op .byte 0xf3,0x90
  39. # endif // KMP_MIC
  40. # if KMP_OS_DARWIN
  41. # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
  42. # define KMP_LABEL(x) L_##x // form the name of label
  43. .macro KMP_CFI_DEF_OFFSET
  44. .endmacro
  45. .macro KMP_CFI_OFFSET
  46. .endmacro
  47. .macro KMP_CFI_REGISTER
  48. .endmacro
  49. .macro KMP_CFI_DEF
  50. .endmacro
  51. .macro ALIGN
  52. .align $0
  53. .endmacro
  54. .macro DEBUG_INFO
  55. /* Not sure what .size does in icc, not sure if we need to do something
  56. similar for OS X*.
  57. */
  58. .endmacro
  59. .macro PROC
  60. ALIGN 4
  61. .globl KMP_PREFIX_UNDERSCORE($0)
  62. KMP_PREFIX_UNDERSCORE($0):
  63. .endmacro
  64. # else // KMP_OS_DARWIN
  65. # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
  66. // Format labels so that they don't override function names in gdb's backtraces
  67. // MIC assembler doesn't accept .L syntax, the L works fine there (as well as
  68. // on OS X*)
  69. # if KMP_MIC
  70. # define KMP_LABEL(x) L_##x // local label
  71. # else
  72. # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
  73. # endif // KMP_MIC
  74. .macro ALIGN size
  75. .align 1<<(\size)
  76. .endm
  77. .macro DEBUG_INFO proc
  78. .cfi_endproc
  79. // Not sure why we need .type and .size for the functions
  80. .align 16
  81. .type \proc,@function
  82. .size \proc,.-\proc
  83. .endm
  84. .macro PROC proc
  85. ALIGN 4
  86. .globl KMP_PREFIX_UNDERSCORE(\proc)
  87. KMP_PREFIX_UNDERSCORE(\proc):
  88. .cfi_startproc
  89. .endm
  90. .macro KMP_CFI_DEF_OFFSET sz
  91. .cfi_def_cfa_offset \sz
  92. .endm
  93. .macro KMP_CFI_OFFSET reg, sz
  94. .cfi_offset \reg,\sz
  95. .endm
  96. .macro KMP_CFI_REGISTER reg
  97. .cfi_def_cfa_register \reg
  98. .endm
  99. .macro KMP_CFI_DEF reg, sz
  100. .cfi_def_cfa \reg,\sz
  101. .endm
  102. # endif // KMP_OS_DARWIN
  103. #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
  104. #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
  105. # if KMP_OS_DARWIN
  106. # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
  107. # define KMP_LABEL(x) L_##x // form the name of label
  108. .macro ALIGN
  109. .align $0
  110. .endmacro
  111. .macro DEBUG_INFO
  112. /* Not sure what .size does in icc, not sure if we need to do something
  113. similar for OS X*.
  114. */
  115. .endmacro
  116. .macro PROC
  117. ALIGN 4
  118. .globl KMP_PREFIX_UNDERSCORE($0)
  119. KMP_PREFIX_UNDERSCORE($0):
  120. .endmacro
  121. # else // KMP_OS_DARWIN
  122. # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
  123. // Format labels so that they don't override function names in gdb's backtraces
  124. # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
  125. .macro ALIGN size
  126. .align 1<<(\size)
  127. .endm
  128. .macro DEBUG_INFO proc
  129. .cfi_endproc
  130. // Not sure why we need .type and .size for the functions
  131. ALIGN 2
  132. .type \proc,@function
  133. .size \proc,.-\proc
  134. .endm
  135. .macro PROC proc
  136. ALIGN 2
  137. .globl KMP_PREFIX_UNDERSCORE(\proc)
  138. KMP_PREFIX_UNDERSCORE(\proc):
  139. .cfi_startproc
  140. .endm
  141. # endif // KMP_OS_DARWIN
  142. #endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
  143. // -----------------------------------------------------------------------
  144. // data
  145. // -----------------------------------------------------------------------
  146. #ifdef KMP_GOMP_COMPAT
  147. // Support for unnamed common blocks.
  148. //
  149. // Because the symbol ".gomp_critical_user_" contains a ".", we have to
  150. // put this stuff in assembly.
  151. # if KMP_ARCH_X86
  152. # if KMP_OS_DARWIN
  153. .data
  154. .comm .gomp_critical_user_,32
  155. .data
  156. .globl ___kmp_unnamed_critical_addr
  157. ___kmp_unnamed_critical_addr:
  158. .long .gomp_critical_user_
  159. # else /* Linux* OS */
  160. .data
  161. .comm .gomp_critical_user_,32,8
  162. .data
  163. ALIGN 4
  164. .global __kmp_unnamed_critical_addr
  165. __kmp_unnamed_critical_addr:
  166. .4byte .gomp_critical_user_
  167. .type __kmp_unnamed_critical_addr,@object
  168. .size __kmp_unnamed_critical_addr,4
  169. # endif /* KMP_OS_DARWIN */
  170. # endif /* KMP_ARCH_X86 */
  171. # if KMP_ARCH_X86_64
  172. # if KMP_OS_DARWIN
  173. .data
  174. .comm .gomp_critical_user_,32
  175. .data
  176. .globl ___kmp_unnamed_critical_addr
  177. ___kmp_unnamed_critical_addr:
  178. .quad .gomp_critical_user_
  179. # else /* Linux* OS */
  180. .data
  181. .comm .gomp_critical_user_,32,8
  182. .data
  183. ALIGN 8
  184. .global __kmp_unnamed_critical_addr
  185. __kmp_unnamed_critical_addr:
  186. .8byte .gomp_critical_user_
  187. .type __kmp_unnamed_critical_addr,@object
  188. .size __kmp_unnamed_critical_addr,8
  189. # endif /* KMP_OS_DARWIN */
  190. # endif /* KMP_ARCH_X86_64 */
  191. #endif /* KMP_GOMP_COMPAT */
  192. #if KMP_ARCH_X86 && !KMP_ARCH_PPC64
  193. // -----------------------------------------------------------------------
  194. // microtasking routines specifically written for IA-32 architecture
  195. // running Linux* OS
  196. // -----------------------------------------------------------------------
  197. .ident "Intel Corporation"
  198. .data
  199. ALIGN 4
  200. // void
  201. // __kmp_x86_pause( void );
  202. .text
  203. PROC __kmp_x86_pause
  204. pause_op
  205. ret
  206. DEBUG_INFO __kmp_x86_pause
  207. # if !KMP_ASM_INTRINS
  208. //------------------------------------------------------------------------
  209. // kmp_int32
  210. // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
  211. PROC __kmp_test_then_add32
  212. movl 4(%esp), %ecx
  213. movl 8(%esp), %eax
  214. lock
  215. xaddl %eax,(%ecx)
  216. ret
  217. DEBUG_INFO __kmp_test_then_add32
  218. //------------------------------------------------------------------------
  219. // FUNCTION __kmp_xchg_fixed8
  220. //
  221. // kmp_int32
  222. // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
  223. //
  224. // parameters:
  225. // p: 4(%esp)
  226. // d: 8(%esp)
  227. //
  228. // return: %al
  229. PROC __kmp_xchg_fixed8
  230. movl 4(%esp), %ecx // "p"
  231. movb 8(%esp), %al // "d"
  232. lock
  233. xchgb %al,(%ecx)
  234. ret
  235. DEBUG_INFO __kmp_xchg_fixed8
  236. //------------------------------------------------------------------------
  237. // FUNCTION __kmp_xchg_fixed16
  238. //
  239. // kmp_int16
  240. // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
  241. //
  242. // parameters:
  243. // p: 4(%esp)
  244. // d: 8(%esp)
  245. // return: %ax
  246. PROC __kmp_xchg_fixed16
  247. movl 4(%esp), %ecx // "p"
  248. movw 8(%esp), %ax // "d"
  249. lock
  250. xchgw %ax,(%ecx)
  251. ret
  252. DEBUG_INFO __kmp_xchg_fixed16
  253. //------------------------------------------------------------------------
  254. // FUNCTION __kmp_xchg_fixed32
  255. //
  256. // kmp_int32
  257. // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
  258. //
  259. // parameters:
  260. // p: 4(%esp)
  261. // d: 8(%esp)
  262. //
  263. // return: %eax
  264. PROC __kmp_xchg_fixed32
  265. movl 4(%esp), %ecx // "p"
  266. movl 8(%esp), %eax // "d"
  267. lock
  268. xchgl %eax,(%ecx)
  269. ret
  270. DEBUG_INFO __kmp_xchg_fixed32
  271. // kmp_int8
  272. // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
  273. PROC __kmp_compare_and_store8
  274. movl 4(%esp), %ecx
  275. movb 8(%esp), %al
  276. movb 12(%esp), %dl
  277. lock
  278. cmpxchgb %dl,(%ecx)
  279. sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
  280. and $1, %eax // sign extend previous instruction
  281. ret
  282. DEBUG_INFO __kmp_compare_and_store8
  283. // kmp_int16
  284. // __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
  285. PROC __kmp_compare_and_store16
  286. movl 4(%esp), %ecx
  287. movw 8(%esp), %ax
  288. movw 12(%esp), %dx
  289. lock
  290. cmpxchgw %dx,(%ecx)
  291. sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
  292. and $1, %eax // sign extend previous instruction
  293. ret
  294. DEBUG_INFO __kmp_compare_and_store16
  295. // kmp_int32
  296. // __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
  297. PROC __kmp_compare_and_store32
  298. movl 4(%esp), %ecx
  299. movl 8(%esp), %eax
  300. movl 12(%esp), %edx
  301. lock
  302. cmpxchgl %edx,(%ecx)
  303. sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
  304. and $1, %eax // sign extend previous instruction
  305. ret
  306. DEBUG_INFO __kmp_compare_and_store32
  307. // kmp_int32
  308. // __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
  309. PROC __kmp_compare_and_store64
  310. pushl %ebp
  311. movl %esp, %ebp
  312. pushl %ebx
  313. pushl %edi
  314. movl 8(%ebp), %edi
  315. movl 12(%ebp), %eax // "cv" low order word
  316. movl 16(%ebp), %edx // "cv" high order word
  317. movl 20(%ebp), %ebx // "sv" low order word
  318. movl 24(%ebp), %ecx // "sv" high order word
  319. lock
  320. cmpxchg8b (%edi)
  321. sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
  322. and $1, %eax // sign extend previous instruction
  323. popl %edi
  324. popl %ebx
  325. movl %ebp, %esp
  326. popl %ebp
  327. ret
  328. DEBUG_INFO __kmp_compare_and_store64
  329. // kmp_int8
  330. // __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
  331. PROC __kmp_compare_and_store_ret8
  332. movl 4(%esp), %ecx
  333. movb 8(%esp), %al
  334. movb 12(%esp), %dl
  335. lock
  336. cmpxchgb %dl,(%ecx)
  337. ret
  338. DEBUG_INFO __kmp_compare_and_store_ret8
  339. // kmp_int16
  340. // __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
  341. // kmp_int16 sv);
  342. PROC __kmp_compare_and_store_ret16
  343. movl 4(%esp), %ecx
  344. movw 8(%esp), %ax
  345. movw 12(%esp), %dx
  346. lock
  347. cmpxchgw %dx,(%ecx)
  348. ret
  349. DEBUG_INFO __kmp_compare_and_store_ret16
  350. // kmp_int32
  351. // __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
  352. // kmp_int32 sv);
  353. PROC __kmp_compare_and_store_ret32
  354. movl 4(%esp), %ecx
  355. movl 8(%esp), %eax
  356. movl 12(%esp), %edx
  357. lock
  358. cmpxchgl %edx,(%ecx)
  359. ret
  360. DEBUG_INFO __kmp_compare_and_store_ret32
  361. // kmp_int64
  362. // __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
  363. // kmp_int64 sv);
  364. PROC __kmp_compare_and_store_ret64
  365. pushl %ebp
  366. movl %esp, %ebp
  367. pushl %ebx
  368. pushl %edi
  369. movl 8(%ebp), %edi
  370. movl 12(%ebp), %eax // "cv" low order word
  371. movl 16(%ebp), %edx // "cv" high order word
  372. movl 20(%ebp), %ebx // "sv" low order word
  373. movl 24(%ebp), %ecx // "sv" high order word
  374. lock
  375. cmpxchg8b (%edi)
  376. popl %edi
  377. popl %ebx
  378. movl %ebp, %esp
  379. popl %ebp
  380. ret
  381. DEBUG_INFO __kmp_compare_and_store_ret64
  382. //------------------------------------------------------------------------
  383. // FUNCTION __kmp_xchg_real32
  384. //
  385. // kmp_real32
  386. // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
  387. //
  388. // parameters:
  389. // addr: 4(%esp)
  390. // data: 8(%esp)
  391. //
  392. // return: %eax
  393. PROC __kmp_xchg_real32
  394. pushl %ebp
  395. movl %esp, %ebp
  396. subl $4, %esp
  397. pushl %esi
  398. movl 4(%ebp), %esi
  399. flds (%esi)
  400. // load <addr>
  401. fsts -4(%ebp)
  402. // store old value
  403. movl 8(%ebp), %eax
  404. lock
  405. xchgl %eax, (%esi)
  406. flds -4(%ebp)
  407. // return old value
  408. popl %esi
  409. movl %ebp, %esp
  410. popl %ebp
  411. ret
  412. DEBUG_INFO __kmp_xchg_real32
  413. # endif /* !KMP_ASM_INTRINS */
  414. //------------------------------------------------------------------------
  415. // int
  416. // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
  417. // int gtid, int tid,
  418. // int argc, void *p_argv[]
  419. // #if OMPT_SUPPORT
  420. // ,
  421. // void **exit_frame_ptr
  422. // #endif
  423. // ) {
  424. // #if OMPT_SUPPORT
  425. // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
  426. // #endif
  427. //
  428. // (*pkfn)( & gtid, & tid, argv[0], ... );
  429. // return 1;
  430. // }
  431. // -- Begin __kmp_invoke_microtask
  432. // mark_begin;
  433. PROC __kmp_invoke_microtask
  434. pushl %ebp
  435. KMP_CFI_DEF_OFFSET 8
  436. KMP_CFI_OFFSET ebp,-8
  437. movl %esp,%ebp // establish the base pointer for this routine.
  438. KMP_CFI_REGISTER ebp
  439. subl $8,%esp // allocate space for two local variables.
  440. // These varibales are:
  441. // argv: -4(%ebp)
  442. // temp: -8(%ebp)
  443. //
  444. pushl %ebx // save %ebx to use during this routine
  445. //
  446. #if OMPT_SUPPORT
  447. movl 28(%ebp),%ebx // get exit_frame address
  448. movl %ebp,(%ebx) // save exit_frame
  449. #endif
  450. movl 20(%ebp),%ebx // Stack alignment - # args
  451. addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
  452. shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
  453. movl %esp,%eax //
  454. subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
  455. movl %eax,%ebx // Save to %ebx
  456. andl $0xFFFFFF80,%eax // mask off 7 bits
  457. subl %eax,%ebx // Amount to subtract from %esp
  458. subl %ebx,%esp // Prepare the stack ptr --
  459. // now it will be aligned on 128-byte boundary at the call
  460. movl 24(%ebp),%eax // copy from p_argv[]
  461. movl %eax,-4(%ebp) // into the local variable *argv.
  462. movl 20(%ebp),%ebx // argc is 20(%ebp)
  463. shll $2,%ebx
  464. KMP_LABEL(invoke_2):
  465. cmpl $0,%ebx
  466. jg KMP_LABEL(invoke_4)
  467. jmp KMP_LABEL(invoke_3)
  468. ALIGN 2
  469. KMP_LABEL(invoke_4):
  470. movl -4(%ebp),%eax
  471. subl $4,%ebx // decrement argc.
  472. addl %ebx,%eax // index into argv.
  473. movl (%eax),%edx
  474. pushl %edx
  475. jmp KMP_LABEL(invoke_2)
  476. ALIGN 2
  477. KMP_LABEL(invoke_3):
  478. leal 16(%ebp),%eax // push & tid
  479. pushl %eax
  480. leal 12(%ebp),%eax // push & gtid
  481. pushl %eax
  482. movl 8(%ebp),%ebx
  483. call *%ebx // call (*pkfn)();
  484. movl $1,%eax // return 1;
  485. movl -12(%ebp),%ebx // restore %ebx
  486. leave
  487. KMP_CFI_DEF esp,4
  488. ret
  489. DEBUG_INFO __kmp_invoke_microtask
  490. // -- End __kmp_invoke_microtask
  491. // kmp_uint64
  492. // __kmp_hardware_timestamp(void)
  493. PROC __kmp_hardware_timestamp
  494. rdtsc
  495. ret
  496. DEBUG_INFO __kmp_hardware_timestamp
  497. // -- End __kmp_hardware_timestamp
  498. #endif /* KMP_ARCH_X86 */
  499. #if KMP_ARCH_X86_64
  500. // -----------------------------------------------------------------------
  501. // microtasking routines specifically written for IA-32 architecture and
  502. // Intel(R) 64 running Linux* OS
  503. // -----------------------------------------------------------------------
  504. // -- Machine type P
  505. // mark_description "Intel Corporation";
  506. .ident "Intel Corporation"
  507. // -- .file "z_Linux_asm.S"
  508. .data
  509. ALIGN 4
  510. // To prevent getting our code into .data section .text added to every routine
  511. // definition for x86_64.
  512. //------------------------------------------------------------------------
  513. # if !KMP_ASM_INTRINS
  514. //------------------------------------------------------------------------
  515. // FUNCTION __kmp_test_then_add32
  516. //
  517. // kmp_int32
  518. // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
  519. //
  520. // parameters:
  521. // p: %rdi
  522. // d: %esi
  523. //
  524. // return: %eax
  525. .text
  526. PROC __kmp_test_then_add32
  527. movl %esi, %eax // "d"
  528. lock
  529. xaddl %eax,(%rdi)
  530. ret
  531. DEBUG_INFO __kmp_test_then_add32
  532. //------------------------------------------------------------------------
  533. // FUNCTION __kmp_test_then_add64
  534. //
  535. // kmp_int64
  536. // __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
  537. //
  538. // parameters:
  539. // p: %rdi
  540. // d: %rsi
  541. // return: %rax
  542. .text
  543. PROC __kmp_test_then_add64
  544. movq %rsi, %rax // "d"
  545. lock
  546. xaddq %rax,(%rdi)
  547. ret
  548. DEBUG_INFO __kmp_test_then_add64
  549. //------------------------------------------------------------------------
  550. // FUNCTION __kmp_xchg_fixed8
  551. //
  552. // kmp_int32
  553. // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
  554. //
  555. // parameters:
  556. // p: %rdi
  557. // d: %sil
  558. //
  559. // return: %al
  560. .text
  561. PROC __kmp_xchg_fixed8
  562. movb %sil, %al // "d"
  563. lock
  564. xchgb %al,(%rdi)
  565. ret
  566. DEBUG_INFO __kmp_xchg_fixed8
  567. //------------------------------------------------------------------------
  568. // FUNCTION __kmp_xchg_fixed16
  569. //
  570. // kmp_int16
  571. // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
  572. //
  573. // parameters:
  574. // p: %rdi
  575. // d: %si
  576. // return: %ax
  577. .text
  578. PROC __kmp_xchg_fixed16
  579. movw %si, %ax // "d"
  580. lock
  581. xchgw %ax,(%rdi)
  582. ret
  583. DEBUG_INFO __kmp_xchg_fixed16
  584. //------------------------------------------------------------------------
  585. // FUNCTION __kmp_xchg_fixed32
  586. //
  587. // kmp_int32
  588. // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
  589. //
  590. // parameters:
  591. // p: %rdi
  592. // d: %esi
  593. //
  594. // return: %eax
  595. .text
  596. PROC __kmp_xchg_fixed32
  597. movl %esi, %eax // "d"
  598. lock
  599. xchgl %eax,(%rdi)
  600. ret
  601. DEBUG_INFO __kmp_xchg_fixed32
  602. //------------------------------------------------------------------------
  603. // FUNCTION __kmp_xchg_fixed64
  604. //
  605. // kmp_int64
  606. // __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
  607. //
  608. // parameters:
  609. // p: %rdi
  610. // d: %rsi
  611. // return: %rax
  612. .text
  613. PROC __kmp_xchg_fixed64
  614. movq %rsi, %rax // "d"
  615. lock
  616. xchgq %rax,(%rdi)
  617. ret
  618. DEBUG_INFO __kmp_xchg_fixed64
  619. //------------------------------------------------------------------------
  620. // FUNCTION __kmp_compare_and_store8
  621. //
  622. // kmp_int8
  623. // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
  624. //
  625. // parameters:
  626. // p: %rdi
  627. // cv: %esi
  628. // sv: %edx
  629. //
  630. // return: %eax
  631. .text
  632. PROC __kmp_compare_and_store8
  633. movb %sil, %al // "cv"
  634. lock
  635. cmpxchgb %dl,(%rdi)
  636. sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
  637. andq $1, %rax // sign extend previous instruction for return value
  638. ret
  639. DEBUG_INFO __kmp_compare_and_store8
  640. //------------------------------------------------------------------------
  641. // FUNCTION __kmp_compare_and_store16
  642. //
  643. // kmp_int16
  644. // __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
  645. //
  646. // parameters:
  647. // p: %rdi
  648. // cv: %si
  649. // sv: %dx
  650. //
  651. // return: %eax
  652. .text
  653. PROC __kmp_compare_and_store16
  654. movw %si, %ax // "cv"
  655. lock
  656. cmpxchgw %dx,(%rdi)
  657. sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
  658. andq $1, %rax // sign extend previous instruction for return value
  659. ret
  660. DEBUG_INFO __kmp_compare_and_store16
  661. //------------------------------------------------------------------------
  662. // FUNCTION __kmp_compare_and_store32
  663. //
  664. // kmp_int32
  665. // __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
  666. //
  667. // parameters:
  668. // p: %rdi
  669. // cv: %esi
  670. // sv: %edx
  671. //
  672. // return: %eax
  673. .text
  674. PROC __kmp_compare_and_store32
  675. movl %esi, %eax // "cv"
  676. lock
  677. cmpxchgl %edx,(%rdi)
  678. sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
  679. andq $1, %rax // sign extend previous instruction for return value
  680. ret
  681. DEBUG_INFO __kmp_compare_and_store32
  682. //------------------------------------------------------------------------
  683. // FUNCTION __kmp_compare_and_store64
  684. //
  685. // kmp_int32
  686. // __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
  687. //
  688. // parameters:
  689. // p: %rdi
  690. // cv: %rsi
  691. // sv: %rdx
  692. // return: %eax
  693. .text
  694. PROC __kmp_compare_and_store64
  695. movq %rsi, %rax // "cv"
  696. lock
  697. cmpxchgq %rdx,(%rdi)
  698. sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
  699. andq $1, %rax // sign extend previous instruction for return value
  700. ret
  701. DEBUG_INFO __kmp_compare_and_store64
  702. //------------------------------------------------------------------------
  703. // FUNCTION __kmp_compare_and_store_ret8
  704. //
  705. // kmp_int8
  706. // __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
  707. //
  708. // parameters:
  709. // p: %rdi
  710. // cv: %esi
  711. // sv: %edx
  712. //
  713. // return: %eax
  714. .text
  715. PROC __kmp_compare_and_store_ret8
  716. movb %sil, %al // "cv"
  717. lock
  718. cmpxchgb %dl,(%rdi)
  719. ret
  720. DEBUG_INFO __kmp_compare_and_store_ret8
  721. //------------------------------------------------------------------------
  722. // FUNCTION __kmp_compare_and_store_ret16
  723. //
  724. // kmp_int16
  725. // __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
  726. //
  727. // parameters:
  728. // p: %rdi
  729. // cv: %si
  730. // sv: %dx
  731. //
  732. // return: %eax
  733. .text
  734. PROC __kmp_compare_and_store_ret16
  735. movw %si, %ax // "cv"
  736. lock
  737. cmpxchgw %dx,(%rdi)
  738. ret
  739. DEBUG_INFO __kmp_compare_and_store_ret16
  740. //------------------------------------------------------------------------
  741. // FUNCTION __kmp_compare_and_store_ret32
  742. //
  743. // kmp_int32
  744. // __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
  745. //
  746. // parameters:
  747. // p: %rdi
  748. // cv: %esi
  749. // sv: %edx
  750. //
  751. // return: %eax
  752. .text
  753. PROC __kmp_compare_and_store_ret32
  754. movl %esi, %eax // "cv"
  755. lock
  756. cmpxchgl %edx,(%rdi)
  757. ret
  758. DEBUG_INFO __kmp_compare_and_store_ret32
  759. //------------------------------------------------------------------------
  760. // FUNCTION __kmp_compare_and_store_ret64
  761. //
  762. // kmp_int64
  763. // __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
  764. //
  765. // parameters:
  766. // p: %rdi
  767. // cv: %rsi
  768. // sv: %rdx
  769. // return: %eax
  770. .text
  771. PROC __kmp_compare_and_store_ret64
  772. movq %rsi, %rax // "cv"
  773. lock
  774. cmpxchgq %rdx,(%rdi)
  775. ret
  776. DEBUG_INFO __kmp_compare_and_store_ret64
  777. # endif /* !KMP_ASM_INTRINS */
  778. # if !KMP_MIC
  779. # if !KMP_ASM_INTRINS
  780. //------------------------------------------------------------------------
  781. // FUNCTION __kmp_xchg_real32
  782. //
  783. // kmp_real32
  784. // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
  785. //
  786. // parameters:
  787. // addr: %rdi
  788. // data: %xmm0 (lower 4 bytes)
  789. //
  790. // return: %xmm0 (lower 4 bytes)
  791. .text
  792. PROC __kmp_xchg_real32
  793. movd %xmm0, %eax // load "data" to eax
  794. lock
  795. xchgl %eax, (%rdi)
  796. movd %eax, %xmm0 // load old value into return register
  797. ret
  798. DEBUG_INFO __kmp_xchg_real32
  799. //------------------------------------------------------------------------
  800. // FUNCTION __kmp_xchg_real64
  801. //
  802. // kmp_real64
  803. // __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
  804. //
  805. // parameters:
  806. // addr: %rdi
  807. // data: %xmm0 (lower 8 bytes)
  808. // return: %xmm0 (lower 8 bytes)
  809. .text
  810. PROC __kmp_xchg_real64
  811. movd %xmm0, %rax // load "data" to rax
  812. lock
  813. xchgq %rax, (%rdi)
  814. movd %rax, %xmm0 // load old value into return register
  815. ret
  816. DEBUG_INFO __kmp_xchg_real64
  817. # endif /* !KMP_MIC */
  818. # endif /* !KMP_ASM_INTRINS */
  819. //------------------------------------------------------------------------
  820. // int
  821. // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
  822. // int gtid, int tid,
  823. // int argc, void *p_argv[]
  824. // #if OMPT_SUPPORT
  825. // ,
  826. // void **exit_frame_ptr
  827. // #endif
  828. // ) {
  829. // #if OMPT_SUPPORT
  830. // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
  831. // #endif
  832. //
  833. // (*pkfn)( & gtid, & tid, argv[0], ... );
  834. // return 1;
  835. // }
  836. //
  837. // note: at call to pkfn must have %rsp 128-byte aligned for compiler
  838. //
  839. // parameters:
  840. // %rdi: pkfn
  841. // %esi: gtid
  842. // %edx: tid
  843. // %ecx: argc
  844. // %r8: p_argv
  845. // %r9: &exit_frame
  846. //
  847. // locals:
  848. // __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
  849. // __tid: tid parm pushed on stack so can pass &tid to pkfn
  850. //
  851. // reg temps:
  852. // %rax: used all over the place
  853. // %rdx: used in stack pointer alignment calculation
  854. // %r11: used to traverse p_argv array
  855. // %rsi: used as temporary for stack parameters
  856. // used as temporary for number of pkfn parms to push
  857. // %rbx: used to hold pkfn address, and zero constant, callee-save
  858. //
  859. // return: %eax (always 1/TRUE)
  860. __gtid = -16
  861. __tid = -24
  862. // -- Begin __kmp_invoke_microtask
  863. // mark_begin;
  864. .text
  865. PROC __kmp_invoke_microtask
  866. pushq %rbp // save base pointer
  867. KMP_CFI_DEF_OFFSET 16
  868. KMP_CFI_OFFSET rbp,-16
  869. movq %rsp,%rbp // establish the base pointer for this routine.
  870. KMP_CFI_REGISTER rbp
  871. #if OMPT_SUPPORT
  872. movq %rbp, (%r9) // save exit_frame
  873. #endif
  874. pushq %rbx // %rbx is callee-saved register
  875. pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
  876. pushq %rdx // Put tid on stack so can pass &tid to pkfn
  877. movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
  878. movq $0, %rbx // constant for cmovs later
  879. subq $4, %rax // subtract four args passed in registers to pkfn
  880. #if KMP_MIC
  881. js KMP_LABEL(kmp_0) // jump to movq
  882. jmp KMP_LABEL(kmp_0_exit) // jump ahead
  883. KMP_LABEL(kmp_0):
  884. movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
  885. KMP_LABEL(kmp_0_exit):
  886. #else
  887. cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
  888. #endif // KMP_MIC
  889. movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
  890. shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
  891. movq %rsp, %rdx //
  892. subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
  893. // without align, stack ptr would be this
  894. movq %rdx, %rax // Save to %rax
  895. andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
  896. subq %rax, %rdx // Amount to subtract from %rsp
  897. subq %rdx, %rsp // Prepare the stack ptr --
  898. // now %rsp will align to 128-byte boundary at call site
  899. // setup pkfn parameter reg and stack
  900. movq %rcx, %rax // argc -> %rax
  901. cmpq $0, %rsi
  902. je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
  903. shlq $3, %rcx // argc*8 -> %rcx
  904. movq %r8, %rdx // p_argv -> %rdx
  905. addq %rcx, %rdx // &p_argv[argc] -> %rdx
  906. movq %rsi, %rcx // max (0, argc-4) -> %rcx
  907. KMP_LABEL(kmp_invoke_push_parms):
  908. // push nth - 7th parms to pkfn on stack
  909. subq $8, %rdx // decrement p_argv pointer to previous parm
  910. movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
  911. pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
  912. subl $1, %ecx
  913. // C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
  914. // if the name of the label that is an operand of this jecxz starts with a dot (".");
  915. // Apple's linker does not support 1-byte length relocation;
  916. // Resolution: replace all .labelX entries with L_labelX.
  917. jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
  918. jmp KMP_LABEL(kmp_invoke_push_parms)
  919. ALIGN 3
  920. KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
  921. // order here is important to avoid trashing
  922. // registers used for both input and output parms!
  923. movq %rdi, %rbx // pkfn -> %rbx
  924. leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
  925. leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
  926. movq %r8, %r11 // p_argv -> %r11
  927. #if KMP_MIC
  928. cmpq $4, %rax // argc >= 4?
  929. jns KMP_LABEL(kmp_4) // jump to movq
  930. jmp KMP_LABEL(kmp_4_exit) // jump ahead
  931. KMP_LABEL(kmp_4):
  932. movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
  933. KMP_LABEL(kmp_4_exit):
  934. cmpq $3, %rax // argc >= 3?
  935. jns KMP_LABEL(kmp_3) // jump to movq
  936. jmp KMP_LABEL(kmp_3_exit) // jump ahead
  937. KMP_LABEL(kmp_3):
  938. movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
  939. KMP_LABEL(kmp_3_exit):
  940. cmpq $2, %rax // argc >= 2?
  941. jns KMP_LABEL(kmp_2) // jump to movq
  942. jmp KMP_LABEL(kmp_2_exit) // jump ahead
  943. KMP_LABEL(kmp_2):
  944. movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
  945. KMP_LABEL(kmp_2_exit):
  946. cmpq $1, %rax // argc >= 1?
  947. jns KMP_LABEL(kmp_1) // jump to movq
  948. jmp KMP_LABEL(kmp_1_exit) // jump ahead
  949. KMP_LABEL(kmp_1):
  950. movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
  951. KMP_LABEL(kmp_1_exit):
  952. #else
  953. cmpq $4, %rax // argc >= 4?
  954. cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
  955. cmpq $3, %rax // argc >= 3?
  956. cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
  957. cmpq $2, %rax // argc >= 2?
  958. cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
  959. cmpq $1, %rax // argc >= 1?
  960. cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
  961. #endif // KMP_MIC
  962. call *%rbx // call (*pkfn)();
  963. movq $1, %rax // move 1 into return register;
  964. movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
  965. movq %rbp, %rsp // restore stack pointer
  966. popq %rbp // restore frame pointer
  967. KMP_CFI_DEF rsp,8
  968. ret
  969. DEBUG_INFO __kmp_invoke_microtask
  970. // -- End __kmp_invoke_microtask
  971. // kmp_uint64
  972. // __kmp_hardware_timestamp(void)
  973. .text
  974. PROC __kmp_hardware_timestamp
  975. rdtsc
  976. shlq $32, %rdx
  977. orq %rdx, %rax
  978. ret
  979. DEBUG_INFO __kmp_hardware_timestamp
  980. // -- End __kmp_hardware_timestamp
  981. //------------------------------------------------------------------------
  982. // FUNCTION __kmp_bsr32
  983. //
  984. // int
  985. // __kmp_bsr32( int );
  986. .text
  987. PROC __kmp_bsr32
  988. bsr %edi,%eax
  989. ret
  990. DEBUG_INFO __kmp_bsr32
  991. // -----------------------------------------------------------------------
  992. #endif /* KMP_ARCH_X86_64 */
  993. // '
  994. #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
  995. //------------------------------------------------------------------------
  996. // int
  997. // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
  998. // int gtid, int tid,
  999. // int argc, void *p_argv[]
  1000. // #if OMPT_SUPPORT
  1001. // ,
  1002. // void **exit_frame_ptr
  1003. // #endif
  1004. // ) {
  1005. // #if OMPT_SUPPORT
  1006. // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
  1007. // #endif
  1008. //
  1009. // (*pkfn)( & gtid, & tid, argv[0], ... );
  1010. //
  1011. // // FIXME: This is done at call-site and can be removed here.
  1012. // #if OMPT_SUPPORT
  1013. // *exit_frame_ptr = 0;
  1014. // #endif
  1015. //
  1016. // return 1;
  1017. // }
  1018. //
  1019. // parameters:
  1020. // x0: pkfn
  1021. // w1: gtid
  1022. // w2: tid
  1023. // w3: argc
  1024. // x4: p_argv
  1025. // x5: &exit_frame
  1026. //
  1027. // locals:
  1028. // __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
  1029. // __tid: tid parm pushed on stack so can pass &tid to pkfn
  1030. //
  1031. // reg temps:
  1032. // x8: used to hold pkfn address
  1033. // w9: used as temporary for number of pkfn parms
  1034. // x10: used to traverse p_argv array
  1035. // x11: used as temporary for stack placement calculation
  1036. // x12: used as temporary for stack parameters
  1037. // x19: used to preserve exit_frame_ptr, callee-save
  1038. //
  1039. // return: w0 (always 1/TRUE)
  1040. //
  1041. __gtid = 4
  1042. __tid = 8
  1043. // -- Begin __kmp_invoke_microtask
  1044. // mark_begin;
  1045. .text
  1046. PROC __kmp_invoke_microtask
  1047. stp x29, x30, [sp, #-16]!
  1048. # if OMPT_SUPPORT
  1049. stp x19, x20, [sp, #-16]!
  1050. # endif
  1051. mov x29, sp
  1052. orr w9, wzr, #1
  1053. add w9, w9, w3, lsr #1
  1054. sub sp, sp, w9, uxtw #4
  1055. mov x11, sp
  1056. mov x8, x0
  1057. str w1, [x29, #-__gtid]
  1058. str w2, [x29, #-__tid]
  1059. mov w9, w3
  1060. mov x10, x4
  1061. # if OMPT_SUPPORT
  1062. mov x19, x5
  1063. str x29, [x19]
  1064. # endif
  1065. sub x0, x29, #__gtid
  1066. sub x1, x29, #__tid
  1067. cbz w9, KMP_LABEL(kmp_1)
  1068. ldr x2, [x10]
  1069. sub w9, w9, #1
  1070. cbz w9, KMP_LABEL(kmp_1)
  1071. ldr x3, [x10, #8]!
  1072. sub w9, w9, #1
  1073. cbz w9, KMP_LABEL(kmp_1)
  1074. ldr x4, [x10, #8]!
  1075. sub w9, w9, #1
  1076. cbz w9, KMP_LABEL(kmp_1)
  1077. ldr x5, [x10, #8]!
  1078. sub w9, w9, #1
  1079. cbz w9, KMP_LABEL(kmp_1)
  1080. ldr x6, [x10, #8]!
  1081. sub w9, w9, #1
  1082. cbz w9, KMP_LABEL(kmp_1)
  1083. ldr x7, [x10, #8]!
  1084. KMP_LABEL(kmp_0):
  1085. sub w9, w9, #1
  1086. cbz w9, KMP_LABEL(kmp_1)
  1087. ldr x12, [x10, #8]!
  1088. str x12, [x11], #8
  1089. b KMP_LABEL(kmp_0)
  1090. KMP_LABEL(kmp_1):
  1091. blr x8
  1092. orr w0, wzr, #1
  1093. mov sp, x29
  1094. # if OMPT_SUPPORT
  1095. str xzr, [x19]
  1096. ldp x19, x20, [sp], #16
  1097. # endif
  1098. ldp x29, x30, [sp], #16
  1099. ret
  1100. DEBUG_INFO __kmp_invoke_microtask
  1101. // -- End __kmp_invoke_microtask
  1102. #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */
  1103. #if KMP_ARCH_PPC64
  1104. //------------------------------------------------------------------------
  1105. // int
  1106. // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
  1107. // int gtid, int tid,
  1108. // int argc, void *p_argv[]
  1109. // #if OMPT_SUPPORT
  1110. // ,
  1111. // void **exit_frame_ptr
  1112. // #endif
  1113. // ) {
  1114. // #if OMPT_SUPPORT
  1115. // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
  1116. // #endif
  1117. //
  1118. // (*pkfn)( & gtid, & tid, argv[0], ... );
  1119. //
  1120. // // FIXME: This is done at call-site and can be removed here.
  1121. // #if OMPT_SUPPORT
  1122. // *exit_frame_ptr = 0;
  1123. // #endif
  1124. //
  1125. // return 1;
  1126. // }
  1127. //
  1128. // parameters:
  1129. // r3: pkfn
  1130. // r4: gtid
  1131. // r5: tid
  1132. // r6: argc
  1133. // r7: p_argv
  1134. // r8: &exit_frame
  1135. //
  1136. // return: r3 (always 1/TRUE)
  1137. //
  1138. .text
  1139. # if KMP_ARCH_PPC64_ELFv2
  1140. .abiversion 2
  1141. # endif
  1142. .globl __kmp_invoke_microtask
  1143. # if KMP_ARCH_PPC64_ELFv2
  1144. .p2align 4
  1145. # else
  1146. .p2align 2
  1147. # endif
  1148. .type __kmp_invoke_microtask,@function
  1149. # if KMP_ARCH_PPC64_ELFv2
  1150. __kmp_invoke_microtask:
  1151. .Lfunc_begin0:
  1152. .Lfunc_gep0:
  1153. addis 2, 12, .TOC.-.Lfunc_gep0@ha
  1154. addi 2, 2, .TOC.-.Lfunc_gep0@l
  1155. .Lfunc_lep0:
  1156. .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
  1157. # else
  1158. .section .opd,"aw",@progbits
  1159. __kmp_invoke_microtask:
  1160. .p2align 3
  1161. .quad .Lfunc_begin0
  1162. .quad .TOC.@tocbase
  1163. .quad 0
  1164. .text
  1165. .Lfunc_begin0:
  1166. # endif
  1167. // -- Begin __kmp_invoke_microtask
  1168. // mark_begin;
  1169. // We need to allocate a stack frame large enough to hold all of the parameters
  1170. // on the stack for the microtask plus what this function needs. That's 48
  1171. // bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
  1172. // parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
  1173. // and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
  1174. // to save r30 to hold a copy of r8.
  1175. .cfi_startproc
  1176. mflr 0
  1177. std 31, -8(1)
  1178. std 0, 16(1)
  1179. // This is unusual because normally we'd set r31 equal to r1 after the stack
  1180. // frame is established. In this case, however, we need to dynamically compute
  1181. // the stack frame size, and so we keep a direct copy of r1 to access our
  1182. // register save areas and restore the r1 value before returning.
  1183. mr 31, 1
  1184. .cfi_def_cfa_register r31
  1185. .cfi_offset r31, -8
  1186. .cfi_offset lr, 16
  1187. // Compute the size necessary for the local stack frame.
  1188. # if KMP_ARCH_PPC64_ELFv2
  1189. li 12, 72
  1190. # else
  1191. li 12, 88
  1192. # endif
  1193. sldi 0, 6, 3
  1194. add 12, 0, 12
  1195. neg 12, 12
  1196. // We need to make sure that the stack frame stays aligned (to 16 bytes).
  1197. li 0, -16
  1198. and 12, 0, 12
  1199. // Establish the local stack frame.
  1200. stdux 1, 1, 12
  1201. # if OMPT_SUPPORT
  1202. .cfi_offset r30, -16
  1203. std 30, -16(31)
  1204. std 1, 0(8)
  1205. mr 30, 8
  1206. # endif
  1207. // Store gtid and tid to the stack because they're passed by reference to the microtask.
  1208. stw 4, -20(31)
  1209. stw 5, -24(31)
  1210. mr 12, 6
  1211. mr 4, 7
  1212. cmpwi 0, 12, 1
  1213. blt 0, .Lcall
  1214. ld 5, 0(4)
  1215. cmpwi 0, 12, 2
  1216. blt 0, .Lcall
  1217. ld 6, 8(4)
  1218. cmpwi 0, 12, 3
  1219. blt 0, .Lcall
  1220. ld 7, 16(4)
  1221. cmpwi 0, 12, 4
  1222. blt 0, .Lcall
  1223. ld 8, 24(4)
  1224. cmpwi 0, 12, 5
  1225. blt 0, .Lcall
  1226. ld 9, 32(4)
  1227. cmpwi 0, 12, 6
  1228. blt 0, .Lcall
  1229. ld 10, 40(4)
  1230. cmpwi 0, 12, 7
  1231. blt 0, .Lcall
  1232. // There are more than 6 microtask parameters, so we need to store the
  1233. // remainder to the stack.
  1234. addi 12, 12, -6
  1235. mtctr 12
  1236. // These are set to 8 bytes before the first desired store address (we're using
  1237. // pre-increment loads and stores in the loop below). The parameter save area
  1238. // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
  1239. // 32 + 8*8 == 96 bytes above r1 for ELFv2.
  1240. addi 4, 4, 40
  1241. # if KMP_ARCH_PPC64_ELFv2
  1242. addi 12, 1, 88
  1243. # else
  1244. addi 12, 1, 104
  1245. # endif
  1246. .Lnext:
  1247. ldu 0, 8(4)
  1248. stdu 0, 8(12)
  1249. bdnz .Lnext
  1250. .Lcall:
  1251. # if KMP_ARCH_PPC64_ELFv2
  1252. std 2, 24(1)
  1253. mr 12, 3
  1254. #else
  1255. std 2, 40(1)
  1256. // For ELFv1, we need to load the actual function address from the function descriptor.
  1257. ld 12, 0(3)
  1258. ld 2, 8(3)
  1259. ld 11, 16(3)
  1260. #endif
  1261. addi 3, 31, -20
  1262. addi 4, 31, -24
  1263. mtctr 12
  1264. bctrl
  1265. # if KMP_ARCH_PPC64_ELFv2
  1266. ld 2, 24(1)
  1267. # else
  1268. ld 2, 40(1)
  1269. # endif
  1270. # if OMPT_SUPPORT
  1271. li 3, 0
  1272. std 3, 0(30)
  1273. # endif
  1274. li 3, 1
  1275. # if OMPT_SUPPORT
  1276. ld 30, -16(31)
  1277. # endif
  1278. mr 1, 31
  1279. ld 0, 16(1)
  1280. ld 31, -8(1)
  1281. mtlr 0
  1282. blr
  1283. .long 0
  1284. .quad 0
  1285. .Lfunc_end0:
  1286. .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
  1287. .cfi_endproc
  1288. // -- End __kmp_invoke_microtask
  1289. #endif /* KMP_ARCH_PPC64 */
  1290. #if KMP_ARCH_RISCV64
  1291. //------------------------------------------------------------------------
  1292. //
  1293. // typedef void (*microtask_t)(int *gtid, int *tid, ...);
  1294. //
  1295. // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
  1296. // void *p_argv[]
  1297. // #if OMPT_SUPPORT
  1298. // ,
  1299. // void **exit_frame_ptr
  1300. // #endif
  1301. // ) {
  1302. // #if OMPT_SUPPORT
  1303. // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
  1304. // #endif
  1305. //
  1306. // (*pkfn)(&gtid, &tid, argv[0], ...);
  1307. //
  1308. // return 1;
  1309. // }
  1310. //
  1311. // Parameters:
  1312. // a0: pkfn
  1313. // a1: gtid
  1314. // a2: tid
  1315. // a3: argc
  1316. // a4: p_argv
  1317. // a5: exit_frame_ptr
  1318. //
  1319. // Locals:
  1320. // __gtid: gtid param pushed on stack so can pass &gtid to pkfn
  1321. // __tid: tid param pushed on stack so can pass &tid to pkfn
  1322. //
  1323. // Temp. registers:
  1324. //
  1325. // t0: used to calculate the dynamic stack size / used to hold pkfn address
  1326. // t1: used as temporary for stack placement calculation
  1327. // t2: used as temporary for stack arguments
  1328. // t3: used as temporary for number of remaining pkfn parms
  1329. // t4: used to traverse p_argv array
  1330. //
  1331. // return: a0 (always 1/TRUE)
  1332. //
  1333. __gtid = -20
  1334. __tid = -24
  1335. // -- Begin __kmp_invoke_microtask
  1336. // mark_begin;
  1337. .text
  1338. .globl __kmp_invoke_microtask
  1339. .p2align 1
  1340. .type __kmp_invoke_microtask,@function
  1341. __kmp_invoke_microtask:
  1342. .cfi_startproc
  1343. // First, save ra and fp
  1344. addi sp, sp, -16
  1345. sd ra, 8(sp)
  1346. sd fp, 0(sp)
  1347. addi fp, sp, 16
  1348. .cfi_def_cfa fp, 0
  1349. .cfi_offset ra, -8
  1350. .cfi_offset fp, -16
  1351. // Compute the dynamic stack size:
  1352. //
  1353. // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
  1354. // reference
  1355. // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
  1356. // function by register. Given that we have 8 of such registers (a[0-7])
  1357. // and two + 'argc' arguments (consider &gtid and &tid), we need to
  1358. // reserve max(0, argc - 6)*8 extra bytes
  1359. //
  1360. // The total number of bytes is then max(0, argc - 6)*8 + 8
  1361. // Compute max(0, argc - 6) using the following bithack:
  1362. // max(0, x) = x - (x & (x >> 31)), where x := argc - 6
  1363. // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
  1364. addi t0, a3, -6
  1365. srai t1, t0, 31
  1366. and t1, t0, t1
  1367. sub t0, t0, t1
  1368. addi t0, t0, 1
  1369. slli t0, t0, 3
  1370. sub sp, sp, t0
  1371. // Align the stack to 16 bytes
  1372. andi sp, sp, -16
  1373. mv t0, a0
  1374. mv t3, a3
  1375. mv t4, a4
  1376. #if OMPT_SUPPORT
  1377. // Save frame pointer into exit_frame
  1378. sd fp, 0(a5)
  1379. #endif
  1380. // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
  1381. sw a1, __gtid(fp)
  1382. sw a2, __tid(fp)
  1383. addi a0, fp, __gtid
  1384. addi a1, fp, __tid
  1385. beqz t3, .L_kmp_3
  1386. ld a2, 0(t4)
  1387. addi t3, t3, -1
  1388. beqz t3, .L_kmp_3
  1389. ld a3, 8(t4)
  1390. addi t3, t3, -1
  1391. beqz t3, .L_kmp_3
  1392. ld a4, 16(t4)
  1393. addi t3, t3, -1
  1394. beqz t3, .L_kmp_3
  1395. ld a5, 24(t4)
  1396. addi t3, t3, -1
  1397. beqz t3, .L_kmp_3
  1398. ld a6, 32(t4)
  1399. addi t3, t3, -1
  1400. beqz t3, .L_kmp_3
  1401. ld a7, 40(t4)
  1402. // Prepare any additional argument passed through the stack
  1403. addi t4, t4, 48
  1404. mv t1, sp
  1405. j .L_kmp_2
  1406. .L_kmp_1:
  1407. ld t2, 0(t4)
  1408. sd t2, 0(t1)
  1409. addi t4, t4, 8
  1410. addi t1, t1, 8
  1411. .L_kmp_2:
  1412. addi t3, t3, -1
  1413. bnez t3, .L_kmp_1
  1414. .L_kmp_3:
  1415. // Call pkfn function
  1416. jalr t0
  1417. // Restore stack and return
  1418. addi a0, zero, 1
  1419. addi sp, fp, -16
  1420. ld fp, 0(sp)
  1421. ld ra, 8(sp)
  1422. addi sp, sp, 16
  1423. ret
  1424. .Lfunc_end0:
  1425. .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
  1426. .cfi_endproc
  1427. // -- End __kmp_invoke_microtask
  1428. #endif /* KMP_ARCH_RISCV64 */
  1429. #if KMP_ARCH_ARM || KMP_ARCH_MIPS
  1430. .data
  1431. .comm .gomp_critical_user_,32,8
  1432. .data
  1433. .align 4
  1434. .global __kmp_unnamed_critical_addr
  1435. __kmp_unnamed_critical_addr:
  1436. .4byte .gomp_critical_user_
  1437. .size __kmp_unnamed_critical_addr,4
  1438. #endif /* KMP_ARCH_ARM */
  1439. #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
  1440. #ifndef KMP_PREFIX_UNDERSCORE
  1441. # define KMP_PREFIX_UNDERSCORE(x) x
  1442. #endif
  1443. .data
  1444. .comm .gomp_critical_user_,32,8
  1445. .data
  1446. .align 8
  1447. .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
  1448. KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
  1449. .8byte .gomp_critical_user_
  1450. #ifdef __ELF__
  1451. .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
  1452. #endif
  1453. #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
  1454. KMP_ARCH_RISCV64 */
  1455. #if KMP_OS_LINUX
  1456. # if KMP_ARCH_ARM
  1457. .section .note.GNU-stack,"",%progbits
  1458. # else
  1459. .section .note.GNU-stack,"",@progbits
  1460. # endif
  1461. #endif