123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
- ;
- ; Redistribution and use in source and binary forms, with or without
- ; modification, are permitted provided that the following conditions
- ; are met:
- ; * Redistributions of source code must retain the above copyright
- ; notice, this list of conditions and the following disclaimer.
- ; * Redistributions in binary form must reproduce the above copyright
- ; notice, this list of conditions and the following disclaimer in
- ; the documentation and/or other materials provided with the
- ; distribution.
- ; * Neither the name of Intel Corporation nor the names of its
- ; contributors may be used to endorse or promote products derived
- ; from this software without specific prior written permission.
- ;
- ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
- %ifndef _MULTIBINARY_ASM_
- %define _MULTIBINARY_ASM_
- %ifidn __OUTPUT_FORMAT__, elf32
- %define mbin_def_ptr dd
- %define mbin_ptr_sz dword
- %define mbin_rdi edi
- %define mbin_rsi esi
- %define mbin_rax eax
- %define mbin_rbx ebx
- %define mbin_rcx ecx
- %define mbin_rdx edx
- %else
- %define mbin_def_ptr dq
- %define mbin_ptr_sz qword
- %define mbin_rdi rdi
- %define mbin_rsi rsi
- %define mbin_rax rax
- %define mbin_rbx rbx
- %define mbin_rcx rcx
- %define mbin_rdx rdx
- %endif
- %ifndef AS_FEATURE_LEVEL
- %define AS_FEATURE_LEVEL 4
- %endif
- ;;;;
- ; multibinary macro:
- ; creates the visable entry point that uses HW optimized call pointer
- ; creates the init of the HW optimized call pointer
- ;;;;
- %macro mbin_interface 1
- ;;;;
- ; *_dispatched is defaulted to *_mbinit and replaced on first call.
- ; Therefore, *_dispatch_init is only executed on first call.
- ;;;;
- section .data
- %1_dispatched:
- mbin_def_ptr %1_mbinit
- section .text
- global %1:ISAL_SYM_TYPE_FUNCTION
- %1_mbinit:
- ;;; only called the first time to setup hardware match
- call %1_dispatch_init
- ;;; falls thru to execute the hw optimized code
- %1:
- jmp mbin_ptr_sz [%1_dispatched]
- %endmacro
- ;;;;;
- ; mbin_dispatch_init parameters
- ; Use this function when SSE/00/01 is a minimum requirement
- ; 1-> function name
- ; 2-> SSE/00/01 optimized function used as base
- ; 3-> AVX or AVX/02 opt func
- ; 4-> AVX2 or AVX/04 opt func
- ;;;;;
- %macro mbin_dispatch_init 4
- section .text
- %1_dispatch_init:
- push mbin_rsi
- push mbin_rax
- push mbin_rbx
- push mbin_rcx
- push mbin_rdx
- lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
- mov eax, 1
- cpuid
- and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
- cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
- lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
- jne _%1_init_done ; AVX is not available so end
- mov mbin_rsi, mbin_rbx
- ;; Try for AVX2
- xor ecx, ecx
- mov eax, 7
- cpuid
- test ebx, FLAG_CPUID7_EBX_AVX2
- lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
- cmovne mbin_rsi, mbin_rbx
- ;; Does it have xmm and ymm support
- xor ecx, ecx
- xgetbv
- and eax, FLAG_XGETBV_EAX_XMM_YMM
- cmp eax, FLAG_XGETBV_EAX_XMM_YMM
- je _%1_init_done
- lea mbin_rsi, [%2 WRT_OPT]
- _%1_init_done:
- pop mbin_rdx
- pop mbin_rcx
- pop mbin_rbx
- pop mbin_rax
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- ;;;;;
- ; mbin_dispatch_init2 parameters
- ; Cases where only base functions are available
- ; 1-> function name
- ; 2-> base function
- ;;;;;
- %macro mbin_dispatch_init2 2
- section .text
- %1_dispatch_init:
- push mbin_rsi
- lea mbin_rsi, [%2 WRT_OPT] ; Default
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- ;;;;;
- ; mbin_dispatch_init_clmul 3 parameters
- ; Use this case for CRC which needs both SSE4_1 and CLMUL
- ; 1-> function name
- ; 2-> base function
- ; 3-> SSE4_1 and CLMUL optimized function
- ;;;;;
- %macro mbin_dispatch_init_clmul 3
- section .text
- %1_dispatch_init:
- push mbin_rsi
- push mbin_rax
- push mbin_rbx
- push mbin_rcx
- push mbin_rdx
- lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
- mov eax, 1
- cpuid
- lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
- ; Test for SSE4.2
- test ecx, FLAG_CPUID1_ECX_SSE4_1
- jz _%1_init_done
- test ecx, FLAG_CPUID1_ECX_CLMUL
- cmovne mbin_rsi, mbin_rbx
- _%1_init_done:
- pop mbin_rdx
- pop mbin_rcx
- pop mbin_rbx
- pop mbin_rax
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- ;;;;;
- ; mbin_dispatch_init5 parameters
- ; 1-> function name
- ; 2-> base function
- ; 3-> SSE4_2 or 00/01 optimized function
- ; 4-> AVX/02 opt func
- ; 5-> AVX2/04 opt func
- ;;;;;
- %macro mbin_dispatch_init5 5
- section .text
- %1_dispatch_init:
- push mbin_rsi
- push mbin_rax
- push mbin_rbx
- push mbin_rcx
- push mbin_rdx
- lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
- mov eax, 1
- cpuid
- ; Test for SSE4.2
- test ecx, FLAG_CPUID1_ECX_SSE4_2
- lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
- cmovne mbin_rsi, mbin_rbx
- and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
- cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
- lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
- jne _%1_init_done ; AVX is not available so end
- mov mbin_rsi, mbin_rbx
- ;; Try for AVX2
- xor ecx, ecx
- mov eax, 7
- cpuid
- test ebx, FLAG_CPUID7_EBX_AVX2
- lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
- cmovne mbin_rsi, mbin_rbx
- ;; Does it have xmm and ymm support
- xor ecx, ecx
- xgetbv
- and eax, FLAG_XGETBV_EAX_XMM_YMM
- cmp eax, FLAG_XGETBV_EAX_XMM_YMM
- je _%1_init_done
- lea mbin_rsi, [%3 WRT_OPT]
- _%1_init_done:
- pop mbin_rdx
- pop mbin_rcx
- pop mbin_rbx
- pop mbin_rax
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- %if AS_FEATURE_LEVEL >= 6
- ;;;;;
- ; mbin_dispatch_init6 parameters
- ; 1-> function name
- ; 2-> base function
- ; 3-> SSE4_2 or 00/01 optimized function
- ; 4-> AVX/02 opt func
- ; 5-> AVX2/04 opt func
- ; 6-> AVX512/06 opt func
- ;;;;;
- %macro mbin_dispatch_init6 6
- section .text
- %1_dispatch_init:
- push mbin_rsi
- push mbin_rax
- push mbin_rbx
- push mbin_rcx
- push mbin_rdx
- push mbin_rdi
- lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
- mov eax, 1
- cpuid
- mov ebx, ecx ; save cpuid1.ecx
- test ecx, FLAG_CPUID1_ECX_SSE4_2
- je _%1_init_done ; Use base function if no SSE4_2
- lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
- ;; Test for XMM_YMM support/AVX
- test ecx, FLAG_CPUID1_ECX_OSXSAVE
- je _%1_init_done
- xor ecx, ecx
- xgetbv ; xcr -> edx:eax
- mov edi, eax ; save xgetvb.eax
- and eax, FLAG_XGETBV_EAX_XMM_YMM
- cmp eax, FLAG_XGETBV_EAX_XMM_YMM
- jne _%1_init_done
- test ebx, FLAG_CPUID1_ECX_AVX
- je _%1_init_done
- lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
- ;; Test for AVX2
- xor ecx, ecx
- mov eax, 7
- cpuid
- test ebx, FLAG_CPUID7_EBX_AVX2
- je _%1_init_done ; No AVX2 possible
- lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
- ;; Test for AVX512
- and edi, FLAG_XGETBV_EAX_ZMM_OPM
- cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
- jne _%1_init_done ; No AVX512 possible
- and ebx, FLAGS_CPUID7_EBX_AVX512_G1
- cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
- lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
- cmove mbin_rsi, mbin_rbx
- _%1_init_done:
- pop mbin_rdi
- pop mbin_rdx
- pop mbin_rcx
- pop mbin_rbx
- pop mbin_rax
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- %else
- %macro mbin_dispatch_init6 6
- mbin_dispatch_init5 %1, %2, %3, %4, %5
- %endmacro
- %endif
- %if AS_FEATURE_LEVEL >= 10
- ;;;;;
- ; mbin_dispatch_init7 parameters
- ; 1-> function name
- ; 2-> base function
- ; 3-> SSE4_2 or 00/01 optimized function
- ; 4-> AVX/02 opt func
- ; 5-> AVX2/04 opt func
- ; 6-> AVX512/06 opt func
- ; 7-> AVX512 Update/10 opt func
- ;;;;;
- %macro mbin_dispatch_init7 7
- section .text
- %1_dispatch_init:
- push mbin_rsi
- push mbin_rax
- push mbin_rbx
- push mbin_rcx
- push mbin_rdx
- push mbin_rdi
- lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
- mov eax, 1
- cpuid
- mov ebx, ecx ; save cpuid1.ecx
- test ecx, FLAG_CPUID1_ECX_SSE4_2
- je _%1_init_done ; Use base function if no SSE4_2
- lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
- ;; Test for XMM_YMM support/AVX
- test ecx, FLAG_CPUID1_ECX_OSXSAVE
- je _%1_init_done
- xor ecx, ecx
- xgetbv ; xcr -> edx:eax
- mov edi, eax ; save xgetvb.eax
- and eax, FLAG_XGETBV_EAX_XMM_YMM
- cmp eax, FLAG_XGETBV_EAX_XMM_YMM
- jne _%1_init_done
- test ebx, FLAG_CPUID1_ECX_AVX
- je _%1_init_done
- lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
- ;; Test for AVX2
- xor ecx, ecx
- mov eax, 7
- cpuid
- test ebx, FLAG_CPUID7_EBX_AVX2
- je _%1_init_done ; No AVX2 possible
- lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
- ;; Test for AVX512
- and edi, FLAG_XGETBV_EAX_ZMM_OPM
- cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
- jne _%1_init_done ; No AVX512 possible
- and ebx, FLAGS_CPUID7_EBX_AVX512_G1
- cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
- lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
- cmove mbin_rsi, mbin_rbx
- and ecx, FLAGS_CPUID7_ECX_AVX512_G2
- cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
- lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
- cmove mbin_rsi, mbin_rbx
- _%1_init_done:
- pop mbin_rdi
- pop mbin_rdx
- pop mbin_rcx
- pop mbin_rbx
- pop mbin_rax
- mov [%1_dispatched], mbin_rsi
- pop mbin_rsi
- ret
- %endmacro
- %else
- %macro mbin_dispatch_init7 7
- mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
- %endmacro
- %endif
- %endif ; ifndef _MULTIBINARY_ASM_
|