1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762 |
- // z_Linux_asm.S: - microtasking routines specifically
- // written for Intel platforms running Linux* OS
- //
- ////===----------------------------------------------------------------------===//
- ////
- //// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- //// See https://llvm.org/LICENSE.txt for license information.
- //// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- ////
- ////===----------------------------------------------------------------------===//
- //
- // -----------------------------------------------------------------------
- // macros
- // -----------------------------------------------------------------------
- #include "kmp_config.h"
- #if KMP_ARCH_X86 || KMP_ARCH_X86_64
- # if KMP_MIC
- // the 'delay r16/r32/r64' should be used instead of the 'pause'.
- // The delay operation has the effect of removing the current thread from
- // the round-robin HT mechanism, and therefore speeds up the issue rate of
- // the other threads on the same core.
- //
- // A value of 0 works fine for <= 2 threads per core, but causes the EPCC
- // barrier time to increase greatly for 3 or more threads per core.
- //
- // A value of 100 works pretty well for up to 4 threads per core, but isn't
- // quite as fast as 0 for 2 threads per core.
- //
- // We need to check what happens for oversubscription / > 4 threads per core.
- // It is possible that we need to pass the delay value in as a parameter
- // that the caller determines based on the total # threads / # cores.
- //
- //.macro pause_op
- // mov $100, %rax
- // delay %rax
- //.endm
- # else
- # define pause_op .byte 0xf3,0x90
- # endif // KMP_MIC
- # if KMP_OS_DARWIN
- # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
- # define KMP_LABEL(x) L_##x // form the name of label
- .macro KMP_CFI_DEF_OFFSET
- .endmacro
- .macro KMP_CFI_OFFSET
- .endmacro
- .macro KMP_CFI_REGISTER
- .endmacro
- .macro KMP_CFI_DEF
- .endmacro
- .macro ALIGN
- .align $0
- .endmacro
- .macro DEBUG_INFO
- /* Not sure what .size does in icc, not sure if we need to do something
- similar for OS X*.
- */
- .endmacro
- .macro PROC
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE($0)
- KMP_PREFIX_UNDERSCORE($0):
- .endmacro
- # else // KMP_OS_DARWIN
- # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
- // Format labels so that they don't override function names in gdb's backtraces
- // MIC assembler doesn't accept .L syntax, the L works fine there (as well as
- // on OS X*)
- # if KMP_MIC
- # define KMP_LABEL(x) L_##x // local label
- # else
- # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
- # endif // KMP_MIC
- .macro ALIGN size
- .align 1<<(\size)
- .endm
- .macro DEBUG_INFO proc
- .cfi_endproc
- // Not sure why we need .type and .size for the functions
- .align 16
- .type \proc,@function
- .size \proc,.-\proc
- .endm
- .macro PROC proc
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE(\proc)
- KMP_PREFIX_UNDERSCORE(\proc):
- .cfi_startproc
- .endm
- .macro KMP_CFI_DEF_OFFSET sz
- .cfi_def_cfa_offset \sz
- .endm
- .macro KMP_CFI_OFFSET reg, sz
- .cfi_offset \reg,\sz
- .endm
- .macro KMP_CFI_REGISTER reg
- .cfi_def_cfa_register \reg
- .endm
- .macro KMP_CFI_DEF reg, sz
- .cfi_def_cfa \reg,\sz
- .endm
- # endif // KMP_OS_DARWIN
- #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
- #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
- # if KMP_OS_DARWIN
- # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
- # define KMP_LABEL(x) L_##x // form the name of label
- .macro ALIGN
- .align $0
- .endmacro
- .macro DEBUG_INFO
- /* Not sure what .size does in icc, not sure if we need to do something
- similar for OS X*.
- */
- .endmacro
- .macro PROC
- ALIGN 4
- .globl KMP_PREFIX_UNDERSCORE($0)
- KMP_PREFIX_UNDERSCORE($0):
- .endmacro
- # else // KMP_OS_DARWIN
- # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
- // Format labels so that they don't override function names in gdb's backtraces
- # define KMP_LABEL(x) .L_##x // local label hidden from backtraces
- .macro ALIGN size
- .align 1<<(\size)
- .endm
- .macro DEBUG_INFO proc
- .cfi_endproc
- // Not sure why we need .type and .size for the functions
- ALIGN 2
- .type \proc,@function
- .size \proc,.-\proc
- .endm
- .macro PROC proc
- ALIGN 2
- .globl KMP_PREFIX_UNDERSCORE(\proc)
- KMP_PREFIX_UNDERSCORE(\proc):
- .cfi_startproc
- .endm
- # endif // KMP_OS_DARWIN
- #endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
- // -----------------------------------------------------------------------
- // data
- // -----------------------------------------------------------------------
- #ifdef KMP_GOMP_COMPAT
- // Support for unnamed common blocks.
- //
- // Because the symbol ".gomp_critical_user_" contains a ".", we have to
- // put this stuff in assembly.
- # if KMP_ARCH_X86
- # if KMP_OS_DARWIN
- .data
- .comm .gomp_critical_user_,32
- .data
- .globl ___kmp_unnamed_critical_addr
- ___kmp_unnamed_critical_addr:
- .long .gomp_critical_user_
- # else /* Linux* OS */
- .data
- .comm .gomp_critical_user_,32,8
- .data
- ALIGN 4
- .global __kmp_unnamed_critical_addr
- __kmp_unnamed_critical_addr:
- .4byte .gomp_critical_user_
- .type __kmp_unnamed_critical_addr,@object
- .size __kmp_unnamed_critical_addr,4
- # endif /* KMP_OS_DARWIN */
- # endif /* KMP_ARCH_X86 */
- # if KMP_ARCH_X86_64
- # if KMP_OS_DARWIN
- .data
- .comm .gomp_critical_user_,32
- .data
- .globl ___kmp_unnamed_critical_addr
- ___kmp_unnamed_critical_addr:
- .quad .gomp_critical_user_
- # else /* Linux* OS */
- .data
- .comm .gomp_critical_user_,32,8
- .data
- ALIGN 8
- .global __kmp_unnamed_critical_addr
- __kmp_unnamed_critical_addr:
- .8byte .gomp_critical_user_
- .type __kmp_unnamed_critical_addr,@object
- .size __kmp_unnamed_critical_addr,8
- # endif /* KMP_OS_DARWIN */
- # endif /* KMP_ARCH_X86_64 */
- #endif /* KMP_GOMP_COMPAT */
- #if KMP_ARCH_X86 && !KMP_ARCH_PPC64
- // -----------------------------------------------------------------------
- // microtasking routines specifically written for IA-32 architecture
- // running Linux* OS
- // -----------------------------------------------------------------------
- .ident "Intel Corporation"
- .data
- ALIGN 4
- // void
- // __kmp_x86_pause( void );
- .text
- PROC __kmp_x86_pause
- pause_op
- ret
- DEBUG_INFO __kmp_x86_pause
- # if !KMP_ASM_INTRINS
- //------------------------------------------------------------------------
- // kmp_int32
- // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
- PROC __kmp_test_then_add32
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- lock
- xaddl %eax,(%ecx)
- ret
- DEBUG_INFO __kmp_test_then_add32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed8
- //
- // kmp_int32
- // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
- //
- // parameters:
- // p: 4(%esp)
- // d: 8(%esp)
- //
- // return: %al
- PROC __kmp_xchg_fixed8
- movl 4(%esp), %ecx // "p"
- movb 8(%esp), %al // "d"
- lock
- xchgb %al,(%ecx)
- ret
- DEBUG_INFO __kmp_xchg_fixed8
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed16
- //
- // kmp_int16
- // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
- //
- // parameters:
- // p: 4(%esp)
- // d: 8(%esp)
- // return: %ax
- PROC __kmp_xchg_fixed16
- movl 4(%esp), %ecx // "p"
- movw 8(%esp), %ax // "d"
- lock
- xchgw %ax,(%ecx)
- ret
- DEBUG_INFO __kmp_xchg_fixed16
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed32
- //
- // kmp_int32
- // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
- //
- // parameters:
- // p: 4(%esp)
- // d: 8(%esp)
- //
- // return: %eax
- PROC __kmp_xchg_fixed32
- movl 4(%esp), %ecx // "p"
- movl 8(%esp), %eax // "d"
- lock
- xchgl %eax,(%ecx)
- ret
- DEBUG_INFO __kmp_xchg_fixed32
- // kmp_int8
- // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
- PROC __kmp_compare_and_store8
- movl 4(%esp), %ecx
- movb 8(%esp), %al
- movb 12(%esp), %dl
- lock
- cmpxchgb %dl,(%ecx)
- sete %al // if %al == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
- DEBUG_INFO __kmp_compare_and_store8
- // kmp_int16
- // __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
- PROC __kmp_compare_and_store16
- movl 4(%esp), %ecx
- movw 8(%esp), %ax
- movw 12(%esp), %dx
- lock
- cmpxchgw %dx,(%ecx)
- sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
- DEBUG_INFO __kmp_compare_and_store16
- // kmp_int32
- // __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
- PROC __kmp_compare_and_store32
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- lock
- cmpxchgl %edx,(%ecx)
- sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- ret
- DEBUG_INFO __kmp_compare_and_store32
- // kmp_int32
- // __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
- PROC __kmp_compare_and_store64
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- pushl %edi
- movl 8(%ebp), %edi
- movl 12(%ebp), %eax // "cv" low order word
- movl 16(%ebp), %edx // "cv" high order word
- movl 20(%ebp), %ebx // "sv" low order word
- movl 24(%ebp), %ecx // "sv" high order word
- lock
- cmpxchg8b (%edi)
- sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
- popl %edi
- popl %ebx
- movl %ebp, %esp
- popl %ebp
- ret
- DEBUG_INFO __kmp_compare_and_store64
- // kmp_int8
- // __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
- PROC __kmp_compare_and_store_ret8
- movl 4(%esp), %ecx
- movb 8(%esp), %al
- movb 12(%esp), %dl
- lock
- cmpxchgb %dl,(%ecx)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret8
- // kmp_int16
- // __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
- // kmp_int16 sv);
- PROC __kmp_compare_and_store_ret16
- movl 4(%esp), %ecx
- movw 8(%esp), %ax
- movw 12(%esp), %dx
- lock
- cmpxchgw %dx,(%ecx)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret16
- // kmp_int32
- // __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
- // kmp_int32 sv);
- PROC __kmp_compare_and_store_ret32
- movl 4(%esp), %ecx
- movl 8(%esp), %eax
- movl 12(%esp), %edx
- lock
- cmpxchgl %edx,(%ecx)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret32
- // kmp_int64
- // __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
- // kmp_int64 sv);
- PROC __kmp_compare_and_store_ret64
- pushl %ebp
- movl %esp, %ebp
- pushl %ebx
- pushl %edi
- movl 8(%ebp), %edi
- movl 12(%ebp), %eax // "cv" low order word
- movl 16(%ebp), %edx // "cv" high order word
- movl 20(%ebp), %ebx // "sv" low order word
- movl 24(%ebp), %ecx // "sv" high order word
- lock
- cmpxchg8b (%edi)
- popl %edi
- popl %ebx
- movl %ebp, %esp
- popl %ebp
- ret
- DEBUG_INFO __kmp_compare_and_store_ret64
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_real32
- //
- // kmp_real32
- // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
- //
- // parameters:
- // addr: 4(%esp)
- // data: 8(%esp)
- //
- // return: %eax
- PROC __kmp_xchg_real32
- pushl %ebp
- movl %esp, %ebp
- subl $4, %esp
- pushl %esi
- movl 4(%ebp), %esi
- flds (%esi)
- // load <addr>
- fsts -4(%ebp)
- // store old value
- movl 8(%ebp), %eax
- lock
- xchgl %eax, (%esi)
- flds -4(%ebp)
- // return old value
- popl %esi
- movl %ebp, %esp
- popl %ebp
- ret
- DEBUG_INFO __kmp_xchg_real32
- # endif /* !KMP_ASM_INTRINS */
- //------------------------------------------------------------------------
- // int
- // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
- // int gtid, int tid,
- // int argc, void *p_argv[]
- // #if OMPT_SUPPORT
- // ,
- // void **exit_frame_ptr
- // #endif
- // ) {
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
- // #endif
- //
- // (*pkfn)( & gtid, & tid, argv[0], ... );
- // return 1;
- // }
- // -- Begin __kmp_invoke_microtask
- // mark_begin;
- PROC __kmp_invoke_microtask
- pushl %ebp
- KMP_CFI_DEF_OFFSET 8
- KMP_CFI_OFFSET ebp,-8
- movl %esp,%ebp // establish the base pointer for this routine.
- KMP_CFI_REGISTER ebp
- subl $8,%esp // allocate space for two local variables.
- // These varibales are:
- // argv: -4(%ebp)
- // temp: -8(%ebp)
- //
- pushl %ebx // save %ebx to use during this routine
- //
- #if OMPT_SUPPORT
- movl 28(%ebp),%ebx // get exit_frame address
- movl %ebp,(%ebx) // save exit_frame
- #endif
- movl 20(%ebp),%ebx // Stack alignment - # args
- addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid)
- shll $2,%ebx // Number of bytes used on stack: (#args+2)*4
- movl %esp,%eax //
- subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this
- movl %eax,%ebx // Save to %ebx
- andl $0xFFFFFF80,%eax // mask off 7 bits
- subl %eax,%ebx // Amount to subtract from %esp
- subl %ebx,%esp // Prepare the stack ptr --
- // now it will be aligned on 128-byte boundary at the call
- movl 24(%ebp),%eax // copy from p_argv[]
- movl %eax,-4(%ebp) // into the local variable *argv.
- movl 20(%ebp),%ebx // argc is 20(%ebp)
- shll $2,%ebx
- KMP_LABEL(invoke_2):
- cmpl $0,%ebx
- jg KMP_LABEL(invoke_4)
- jmp KMP_LABEL(invoke_3)
- ALIGN 2
- KMP_LABEL(invoke_4):
- movl -4(%ebp),%eax
- subl $4,%ebx // decrement argc.
- addl %ebx,%eax // index into argv.
- movl (%eax),%edx
- pushl %edx
- jmp KMP_LABEL(invoke_2)
- ALIGN 2
- KMP_LABEL(invoke_3):
- leal 16(%ebp),%eax // push & tid
- pushl %eax
- leal 12(%ebp),%eax // push & gtid
- pushl %eax
- movl 8(%ebp),%ebx
- call *%ebx // call (*pkfn)();
- movl $1,%eax // return 1;
- movl -12(%ebp),%ebx // restore %ebx
- leave
- KMP_CFI_DEF esp,4
- ret
- DEBUG_INFO __kmp_invoke_microtask
- // -- End __kmp_invoke_microtask
- // kmp_uint64
- // __kmp_hardware_timestamp(void)
- PROC __kmp_hardware_timestamp
- rdtsc
- ret
- DEBUG_INFO __kmp_hardware_timestamp
- // -- End __kmp_hardware_timestamp
- #endif /* KMP_ARCH_X86 */
- #if KMP_ARCH_X86_64
- // -----------------------------------------------------------------------
- // microtasking routines specifically written for IA-32 architecture and
- // Intel(R) 64 running Linux* OS
- // -----------------------------------------------------------------------
- // -- Machine type P
- // mark_description "Intel Corporation";
- .ident "Intel Corporation"
- // -- .file "z_Linux_asm.S"
- .data
- ALIGN 4
- // To prevent getting our code into .data section .text added to every routine
- // definition for x86_64.
- //------------------------------------------------------------------------
- # if !KMP_ASM_INTRINS
- //------------------------------------------------------------------------
- // FUNCTION __kmp_test_then_add32
- //
- // kmp_int32
- // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
- //
- // parameters:
- // p: %rdi
- // d: %esi
- //
- // return: %eax
- .text
- PROC __kmp_test_then_add32
- movl %esi, %eax // "d"
- lock
- xaddl %eax,(%rdi)
- ret
- DEBUG_INFO __kmp_test_then_add32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_test_then_add64
- //
- // kmp_int64
- // __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
- //
- // parameters:
- // p: %rdi
- // d: %rsi
- // return: %rax
- .text
- PROC __kmp_test_then_add64
- movq %rsi, %rax // "d"
- lock
- xaddq %rax,(%rdi)
- ret
- DEBUG_INFO __kmp_test_then_add64
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed8
- //
- // kmp_int32
- // __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d );
- //
- // parameters:
- // p: %rdi
- // d: %sil
- //
- // return: %al
- .text
- PROC __kmp_xchg_fixed8
- movb %sil, %al // "d"
- lock
- xchgb %al,(%rdi)
- ret
- DEBUG_INFO __kmp_xchg_fixed8
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed16
- //
- // kmp_int16
- // __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d );
- //
- // parameters:
- // p: %rdi
- // d: %si
- // return: %ax
- .text
- PROC __kmp_xchg_fixed16
- movw %si, %ax // "d"
- lock
- xchgw %ax,(%rdi)
- ret
- DEBUG_INFO __kmp_xchg_fixed16
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed32
- //
- // kmp_int32
- // __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d );
- //
- // parameters:
- // p: %rdi
- // d: %esi
- //
- // return: %eax
- .text
- PROC __kmp_xchg_fixed32
- movl %esi, %eax // "d"
- lock
- xchgl %eax,(%rdi)
- ret
- DEBUG_INFO __kmp_xchg_fixed32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_fixed64
- //
- // kmp_int64
- // __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d );
- //
- // parameters:
- // p: %rdi
- // d: %rsi
- // return: %rax
- .text
- PROC __kmp_xchg_fixed64
- movq %rsi, %rax // "d"
- lock
- xchgq %rax,(%rdi)
- ret
- DEBUG_INFO __kmp_xchg_fixed64
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store8
- //
- // kmp_int8
- // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %esi
- // sv: %edx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store8
- movb %sil, %al // "cv"
- lock
- cmpxchgb %dl,(%rdi)
- sete %al // if %al == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
- DEBUG_INFO __kmp_compare_and_store8
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store16
- //
- // kmp_int16
- // __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %si
- // sv: %dx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store16
- movw %si, %ax // "cv"
- lock
- cmpxchgw %dx,(%rdi)
- sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
- DEBUG_INFO __kmp_compare_and_store16
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store32
- //
- // kmp_int32
- // __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %esi
- // sv: %edx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store32
- movl %esi, %eax // "cv"
- lock
- cmpxchgl %edx,(%rdi)
- sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
- DEBUG_INFO __kmp_compare_and_store32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store64
- //
- // kmp_int32
- // __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %rsi
- // sv: %rdx
- // return: %eax
- .text
- PROC __kmp_compare_and_store64
- movq %rsi, %rax // "cv"
- lock
- cmpxchgq %rdx,(%rdi)
- sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0
- andq $1, %rax // sign extend previous instruction for return value
- ret
- DEBUG_INFO __kmp_compare_and_store64
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store_ret8
- //
- // kmp_int8
- // __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %esi
- // sv: %edx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store_ret8
- movb %sil, %al // "cv"
- lock
- cmpxchgb %dl,(%rdi)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret8
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store_ret16
- //
- // kmp_int16
- // __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %si
- // sv: %dx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store_ret16
- movw %si, %ax // "cv"
- lock
- cmpxchgw %dx,(%rdi)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret16
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store_ret32
- //
- // kmp_int32
- // __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %esi
- // sv: %edx
- //
- // return: %eax
- .text
- PROC __kmp_compare_and_store_ret32
- movl %esi, %eax // "cv"
- lock
- cmpxchgl %edx,(%rdi)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_compare_and_store_ret64
- //
- // kmp_int64
- // __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
- //
- // parameters:
- // p: %rdi
- // cv: %rsi
- // sv: %rdx
- // return: %eax
- .text
- PROC __kmp_compare_and_store_ret64
- movq %rsi, %rax // "cv"
- lock
- cmpxchgq %rdx,(%rdi)
- ret
- DEBUG_INFO __kmp_compare_and_store_ret64
- # endif /* !KMP_ASM_INTRINS */
- # if !KMP_MIC
- # if !KMP_ASM_INTRINS
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_real32
- //
- // kmp_real32
- // __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
- //
- // parameters:
- // addr: %rdi
- // data: %xmm0 (lower 4 bytes)
- //
- // return: %xmm0 (lower 4 bytes)
- .text
- PROC __kmp_xchg_real32
- movd %xmm0, %eax // load "data" to eax
- lock
- xchgl %eax, (%rdi)
- movd %eax, %xmm0 // load old value into return register
- ret
- DEBUG_INFO __kmp_xchg_real32
- //------------------------------------------------------------------------
- // FUNCTION __kmp_xchg_real64
- //
- // kmp_real64
- // __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
- //
- // parameters:
- // addr: %rdi
- // data: %xmm0 (lower 8 bytes)
- // return: %xmm0 (lower 8 bytes)
- .text
- PROC __kmp_xchg_real64
- movd %xmm0, %rax // load "data" to rax
- lock
- xchgq %rax, (%rdi)
- movd %rax, %xmm0 // load old value into return register
- ret
- DEBUG_INFO __kmp_xchg_real64
- # endif /* !KMP_MIC */
- # endif /* !KMP_ASM_INTRINS */
- //------------------------------------------------------------------------
- // int
- // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
- // int gtid, int tid,
- // int argc, void *p_argv[]
- // #if OMPT_SUPPORT
- // ,
- // void **exit_frame_ptr
- // #endif
- // ) {
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
- // #endif
- //
- // (*pkfn)( & gtid, & tid, argv[0], ... );
- // return 1;
- // }
- //
- // note: at call to pkfn must have %rsp 128-byte aligned for compiler
- //
- // parameters:
- // %rdi: pkfn
- // %esi: gtid
- // %edx: tid
- // %ecx: argc
- // %r8: p_argv
- // %r9: &exit_frame
- //
- // locals:
- // __gtid: gtid parm pushed on stack so can pass >id to pkfn
- // __tid: tid parm pushed on stack so can pass &tid to pkfn
- //
- // reg temps:
- // %rax: used all over the place
- // %rdx: used in stack pointer alignment calculation
- // %r11: used to traverse p_argv array
- // %rsi: used as temporary for stack parameters
- // used as temporary for number of pkfn parms to push
- // %rbx: used to hold pkfn address, and zero constant, callee-save
- //
- // return: %eax (always 1/TRUE)
- __gtid = -16
- __tid = -24
- // -- Begin __kmp_invoke_microtask
- // mark_begin;
- .text
- PROC __kmp_invoke_microtask
- pushq %rbp // save base pointer
- KMP_CFI_DEF_OFFSET 16
- KMP_CFI_OFFSET rbp,-16
- movq %rsp,%rbp // establish the base pointer for this routine.
- KMP_CFI_REGISTER rbp
- #if OMPT_SUPPORT
- movq %rbp, (%r9) // save exit_frame
- #endif
- pushq %rbx // %rbx is callee-saved register
- pushq %rsi // Put gtid on stack so can pass &tgid to pkfn
- pushq %rdx // Put tid on stack so can pass &tid to pkfn
- movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax
- movq $0, %rbx // constant for cmovs later
- subq $4, %rax // subtract four args passed in registers to pkfn
- #if KMP_MIC
- js KMP_LABEL(kmp_0) // jump to movq
- jmp KMP_LABEL(kmp_0_exit) // jump ahead
- KMP_LABEL(kmp_0):
- movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
- KMP_LABEL(kmp_0_exit):
- #else
- cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4)
- #endif // KMP_MIC
- movq %rax, %rsi // save max(0, argc-4) -> %rsi for later
- shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8
- movq %rsp, %rdx //
- subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx --
- // without align, stack ptr would be this
- movq %rdx, %rax // Save to %rax
- andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align)
- subq %rax, %rdx // Amount to subtract from %rsp
- subq %rdx, %rsp // Prepare the stack ptr --
- // now %rsp will align to 128-byte boundary at call site
- // setup pkfn parameter reg and stack
- movq %rcx, %rax // argc -> %rax
- cmpq $0, %rsi
- je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push
- shlq $3, %rcx // argc*8 -> %rcx
- movq %r8, %rdx // p_argv -> %rdx
- addq %rcx, %rdx // &p_argv[argc] -> %rdx
- movq %rsi, %rcx // max (0, argc-4) -> %rcx
- KMP_LABEL(kmp_invoke_push_parms):
- // push nth - 7th parms to pkfn on stack
- subq $8, %rdx // decrement p_argv pointer to previous parm
- movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi
- pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order)
- subl $1, %ecx
- // C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e
- // if the name of the label that is an operand of this jecxz starts with a dot (".");
- // Apple's linker does not support 1-byte length relocation;
- // Resolution: replace all .labelX entries with L_labelX.
- jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left
- jmp KMP_LABEL(kmp_invoke_push_parms)
- ALIGN 3
- KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
- // order here is important to avoid trashing
- // registers used for both input and output parms!
- movq %rdi, %rbx // pkfn -> %rbx
- leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn)
- leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
- movq %r8, %r11 // p_argv -> %r11
- #if KMP_MIC
- cmpq $4, %rax // argc >= 4?
- jns KMP_LABEL(kmp_4) // jump to movq
- jmp KMP_LABEL(kmp_4_exit) // jump ahead
- KMP_LABEL(kmp_4):
- movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
- KMP_LABEL(kmp_4_exit):
- cmpq $3, %rax // argc >= 3?
- jns KMP_LABEL(kmp_3) // jump to movq
- jmp KMP_LABEL(kmp_3_exit) // jump ahead
- KMP_LABEL(kmp_3):
- movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
- KMP_LABEL(kmp_3_exit):
- cmpq $2, %rax // argc >= 2?
- jns KMP_LABEL(kmp_2) // jump to movq
- jmp KMP_LABEL(kmp_2_exit) // jump ahead
- KMP_LABEL(kmp_2):
- movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
- KMP_LABEL(kmp_2_exit):
- cmpq $1, %rax // argc >= 1?
- jns KMP_LABEL(kmp_1) // jump to movq
- jmp KMP_LABEL(kmp_1_exit) // jump ahead
- KMP_LABEL(kmp_1):
- movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
- KMP_LABEL(kmp_1_exit):
- #else
- cmpq $4, %rax // argc >= 4?
- cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn)
- cmpq $3, %rax // argc >= 3?
- cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn)
- cmpq $2, %rax // argc >= 2?
- cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn)
- cmpq $1, %rax // argc >= 1?
- cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
- #endif // KMP_MIC
- call *%rbx // call (*pkfn)();
- movq $1, %rax // move 1 into return register;
- movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified
- movq %rbp, %rsp // restore stack pointer
- popq %rbp // restore frame pointer
- KMP_CFI_DEF rsp,8
- ret
- DEBUG_INFO __kmp_invoke_microtask
- // -- End __kmp_invoke_microtask
- // kmp_uint64
- // __kmp_hardware_timestamp(void)
- .text
- PROC __kmp_hardware_timestamp
- rdtsc
- shlq $32, %rdx
- orq %rdx, %rax
- ret
- DEBUG_INFO __kmp_hardware_timestamp
- // -- End __kmp_hardware_timestamp
- //------------------------------------------------------------------------
- // FUNCTION __kmp_bsr32
- //
- // int
- // __kmp_bsr32( int );
- .text
- PROC __kmp_bsr32
- bsr %edi,%eax
- ret
- DEBUG_INFO __kmp_bsr32
- // -----------------------------------------------------------------------
- #endif /* KMP_ARCH_X86_64 */
- // '
- #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
- //------------------------------------------------------------------------
- // int
- // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
- // int gtid, int tid,
- // int argc, void *p_argv[]
- // #if OMPT_SUPPORT
- // ,
- // void **exit_frame_ptr
- // #endif
- // ) {
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
- // #endif
- //
- // (*pkfn)( & gtid, & tid, argv[0], ... );
- //
- // // FIXME: This is done at call-site and can be removed here.
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = 0;
- // #endif
- //
- // return 1;
- // }
- //
- // parameters:
- // x0: pkfn
- // w1: gtid
- // w2: tid
- // w3: argc
- // x4: p_argv
- // x5: &exit_frame
- //
- // locals:
- // __gtid: gtid parm pushed on stack so can pass >id to pkfn
- // __tid: tid parm pushed on stack so can pass &tid to pkfn
- //
- // reg temps:
- // x8: used to hold pkfn address
- // w9: used as temporary for number of pkfn parms
- // x10: used to traverse p_argv array
- // x11: used as temporary for stack placement calculation
- // x12: used as temporary for stack parameters
- // x19: used to preserve exit_frame_ptr, callee-save
- //
- // return: w0 (always 1/TRUE)
- //
- __gtid = 4
- __tid = 8
- // -- Begin __kmp_invoke_microtask
- // mark_begin;
- .text
- PROC __kmp_invoke_microtask
- stp x29, x30, [sp, #-16]!
- # if OMPT_SUPPORT
- stp x19, x20, [sp, #-16]!
- # endif
- mov x29, sp
- orr w9, wzr, #1
- add w9, w9, w3, lsr #1
- sub sp, sp, w9, uxtw #4
- mov x11, sp
- mov x8, x0
- str w1, [x29, #-__gtid]
- str w2, [x29, #-__tid]
- mov w9, w3
- mov x10, x4
- # if OMPT_SUPPORT
- mov x19, x5
- str x29, [x19]
- # endif
- sub x0, x29, #__gtid
- sub x1, x29, #__tid
- cbz w9, KMP_LABEL(kmp_1)
- ldr x2, [x10]
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x3, [x10, #8]!
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x4, [x10, #8]!
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x5, [x10, #8]!
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x6, [x10, #8]!
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x7, [x10, #8]!
- KMP_LABEL(kmp_0):
- sub w9, w9, #1
- cbz w9, KMP_LABEL(kmp_1)
- ldr x12, [x10, #8]!
- str x12, [x11], #8
- b KMP_LABEL(kmp_0)
- KMP_LABEL(kmp_1):
- blr x8
- orr w0, wzr, #1
- mov sp, x29
- # if OMPT_SUPPORT
- str xzr, [x19]
- ldp x19, x20, [sp], #16
- # endif
- ldp x29, x30, [sp], #16
- ret
- DEBUG_INFO __kmp_invoke_microtask
- // -- End __kmp_invoke_microtask
- #endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */
- #if KMP_ARCH_PPC64
- //------------------------------------------------------------------------
- // int
- // __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
- // int gtid, int tid,
- // int argc, void *p_argv[]
- // #if OMPT_SUPPORT
- // ,
- // void **exit_frame_ptr
- // #endif
- // ) {
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
- // #endif
- //
- // (*pkfn)( & gtid, & tid, argv[0], ... );
- //
- // // FIXME: This is done at call-site and can be removed here.
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = 0;
- // #endif
- //
- // return 1;
- // }
- //
- // parameters:
- // r3: pkfn
- // r4: gtid
- // r5: tid
- // r6: argc
- // r7: p_argv
- // r8: &exit_frame
- //
- // return: r3 (always 1/TRUE)
- //
- .text
- # if KMP_ARCH_PPC64_ELFv2
- .abiversion 2
- # endif
- .globl __kmp_invoke_microtask
- # if KMP_ARCH_PPC64_ELFv2
- .p2align 4
- # else
- .p2align 2
- # endif
- .type __kmp_invoke_microtask,@function
- # if KMP_ARCH_PPC64_ELFv2
- __kmp_invoke_microtask:
- .Lfunc_begin0:
- .Lfunc_gep0:
- addis 2, 12, .TOC.-.Lfunc_gep0@ha
- addi 2, 2, .TOC.-.Lfunc_gep0@l
- .Lfunc_lep0:
- .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0
- # else
- .section .opd,"aw",@progbits
- __kmp_invoke_microtask:
- .p2align 3
- .quad .Lfunc_begin0
- .quad .TOC.@tocbase
- .quad 0
- .text
- .Lfunc_begin0:
- # endif
- // -- Begin __kmp_invoke_microtask
- // mark_begin;
- // We need to allocate a stack frame large enough to hold all of the parameters
- // on the stack for the microtask plus what this function needs. That's 48
- // bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the
- // parameters to the microtask, plus 8 bytes to store the values of r4 and r5,
- // and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes
- // to save r30 to hold a copy of r8.
- .cfi_startproc
- mflr 0
- std 31, -8(1)
- std 0, 16(1)
- // This is unusual because normally we'd set r31 equal to r1 after the stack
- // frame is established. In this case, however, we need to dynamically compute
- // the stack frame size, and so we keep a direct copy of r1 to access our
- // register save areas and restore the r1 value before returning.
- mr 31, 1
- .cfi_def_cfa_register r31
- .cfi_offset r31, -8
- .cfi_offset lr, 16
- // Compute the size necessary for the local stack frame.
- # if KMP_ARCH_PPC64_ELFv2
- li 12, 72
- # else
- li 12, 88
- # endif
- sldi 0, 6, 3
- add 12, 0, 12
- neg 12, 12
- // We need to make sure that the stack frame stays aligned (to 16 bytes).
- li 0, -16
- and 12, 0, 12
- // Establish the local stack frame.
- stdux 1, 1, 12
- # if OMPT_SUPPORT
- .cfi_offset r30, -16
- std 30, -16(31)
- std 1, 0(8)
- mr 30, 8
- # endif
- // Store gtid and tid to the stack because they're passed by reference to the microtask.
- stw 4, -20(31)
- stw 5, -24(31)
- mr 12, 6
- mr 4, 7
- cmpwi 0, 12, 1
- blt 0, .Lcall
- ld 5, 0(4)
- cmpwi 0, 12, 2
- blt 0, .Lcall
- ld 6, 8(4)
- cmpwi 0, 12, 3
- blt 0, .Lcall
- ld 7, 16(4)
- cmpwi 0, 12, 4
- blt 0, .Lcall
- ld 8, 24(4)
- cmpwi 0, 12, 5
- blt 0, .Lcall
- ld 9, 32(4)
- cmpwi 0, 12, 6
- blt 0, .Lcall
- ld 10, 40(4)
- cmpwi 0, 12, 7
- blt 0, .Lcall
- // There are more than 6 microtask parameters, so we need to store the
- // remainder to the stack.
- addi 12, 12, -6
- mtctr 12
- // These are set to 8 bytes before the first desired store address (we're using
- // pre-increment loads and stores in the loop below). The parameter save area
- // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and
- // 32 + 8*8 == 96 bytes above r1 for ELFv2.
- addi 4, 4, 40
- # if KMP_ARCH_PPC64_ELFv2
- addi 12, 1, 88
- # else
- addi 12, 1, 104
- # endif
- .Lnext:
- ldu 0, 8(4)
- stdu 0, 8(12)
- bdnz .Lnext
- .Lcall:
- # if KMP_ARCH_PPC64_ELFv2
- std 2, 24(1)
- mr 12, 3
- #else
- std 2, 40(1)
- // For ELFv1, we need to load the actual function address from the function descriptor.
- ld 12, 0(3)
- ld 2, 8(3)
- ld 11, 16(3)
- #endif
- addi 3, 31, -20
- addi 4, 31, -24
- mtctr 12
- bctrl
- # if KMP_ARCH_PPC64_ELFv2
- ld 2, 24(1)
- # else
- ld 2, 40(1)
- # endif
- # if OMPT_SUPPORT
- li 3, 0
- std 3, 0(30)
- # endif
- li 3, 1
- # if OMPT_SUPPORT
- ld 30, -16(31)
- # endif
- mr 1, 31
- ld 0, 16(1)
- ld 31, -8(1)
- mtlr 0
- blr
- .long 0
- .quad 0
- .Lfunc_end0:
- .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0
- .cfi_endproc
- // -- End __kmp_invoke_microtask
- #endif /* KMP_ARCH_PPC64 */
- #if KMP_ARCH_RISCV64
- //------------------------------------------------------------------------
- //
- // typedef void (*microtask_t)(int *gtid, int *tid, ...);
- //
- // int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
- // void *p_argv[]
- // #if OMPT_SUPPORT
- // ,
- // void **exit_frame_ptr
- // #endif
- // ) {
- // #if OMPT_SUPPORT
- // *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
- // #endif
- //
- // (*pkfn)(>id, &tid, argv[0], ...);
- //
- // return 1;
- // }
- //
- // Parameters:
- // a0: pkfn
- // a1: gtid
- // a2: tid
- // a3: argc
- // a4: p_argv
- // a5: exit_frame_ptr
- //
- // Locals:
- // __gtid: gtid param pushed on stack so can pass >id to pkfn
- // __tid: tid param pushed on stack so can pass &tid to pkfn
- //
- // Temp. registers:
- //
- // t0: used to calculate the dynamic stack size / used to hold pkfn address
- // t1: used as temporary for stack placement calculation
- // t2: used as temporary for stack arguments
- // t3: used as temporary for number of remaining pkfn parms
- // t4: used to traverse p_argv array
- //
- // return: a0 (always 1/TRUE)
- //
- __gtid = -20
- __tid = -24
- // -- Begin __kmp_invoke_microtask
- // mark_begin;
- .text
- .globl __kmp_invoke_microtask
- .p2align 1
- .type __kmp_invoke_microtask,@function
- __kmp_invoke_microtask:
- .cfi_startproc
- // First, save ra and fp
- addi sp, sp, -16
- sd ra, 8(sp)
- sd fp, 0(sp)
- addi fp, sp, 16
- .cfi_def_cfa fp, 0
- .cfi_offset ra, -8
- .cfi_offset fp, -16
- // Compute the dynamic stack size:
- //
- // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
- // reference
- // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
- // function by register. Given that we have 8 of such registers (a[0-7])
- // and two + 'argc' arguments (consider >id and &tid), we need to
- // reserve max(0, argc - 6)*8 extra bytes
- //
- // The total number of bytes is then max(0, argc - 6)*8 + 8
- // Compute max(0, argc - 6) using the following bithack:
- // max(0, x) = x - (x & (x >> 31)), where x := argc - 6
- // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- addi t0, a3, -6
- srai t1, t0, 31
- and t1, t0, t1
- sub t0, t0, t1
- addi t0, t0, 1
- slli t0, t0, 3
- sub sp, sp, t0
- // Align the stack to 16 bytes
- andi sp, sp, -16
- mv t0, a0
- mv t3, a3
- mv t4, a4
- #if OMPT_SUPPORT
- // Save frame pointer into exit_frame
- sd fp, 0(a5)
- #endif
- // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
- sw a1, __gtid(fp)
- sw a2, __tid(fp)
- addi a0, fp, __gtid
- addi a1, fp, __tid
- beqz t3, .L_kmp_3
- ld a2, 0(t4)
- addi t3, t3, -1
- beqz t3, .L_kmp_3
- ld a3, 8(t4)
- addi t3, t3, -1
- beqz t3, .L_kmp_3
- ld a4, 16(t4)
- addi t3, t3, -1
- beqz t3, .L_kmp_3
- ld a5, 24(t4)
- addi t3, t3, -1
- beqz t3, .L_kmp_3
- ld a6, 32(t4)
- addi t3, t3, -1
- beqz t3, .L_kmp_3
- ld a7, 40(t4)
- // Prepare any additional argument passed through the stack
- addi t4, t4, 48
- mv t1, sp
- j .L_kmp_2
- .L_kmp_1:
- ld t2, 0(t4)
- sd t2, 0(t1)
- addi t4, t4, 8
- addi t1, t1, 8
- .L_kmp_2:
- addi t3, t3, -1
- bnez t3, .L_kmp_1
- .L_kmp_3:
- // Call pkfn function
- jalr t0
- // Restore stack and return
- addi a0, zero, 1
- addi sp, fp, -16
- ld fp, 0(sp)
- ld ra, 8(sp)
- addi sp, sp, 16
- ret
- .Lfunc_end0:
- .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
- .cfi_endproc
- // -- End __kmp_invoke_microtask
- #endif /* KMP_ARCH_RISCV64 */
- #if KMP_ARCH_ARM || KMP_ARCH_MIPS
- .data
- .comm .gomp_critical_user_,32,8
- .data
- .align 4
- .global __kmp_unnamed_critical_addr
- __kmp_unnamed_critical_addr:
- .4byte .gomp_critical_user_
- .size __kmp_unnamed_critical_addr,4
- #endif /* KMP_ARCH_ARM */
- #if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
- #ifndef KMP_PREFIX_UNDERSCORE
- # define KMP_PREFIX_UNDERSCORE(x) x
- #endif
- .data
- .comm .gomp_critical_user_,32,8
- .data
- .align 8
- .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
- KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
- .8byte .gomp_critical_user_
- #ifdef __ELF__
- .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
- #endif
- #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
- KMP_ARCH_RISCV64 */
- #if KMP_OS_LINUX
- # if KMP_ARCH_ARM
- .section .note.GNU-stack,"",%progbits
- # else
- .section .note.GNU-stack,"",@progbits
- # endif
- #endif
|