multibinary.asm 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  2. ; Copyright(c) 2011-2015 Intel Corporation All rights reserved.
  3. ;
  4. ; Redistribution and use in source and binary forms, with or without
  5. ; modification, are permitted provided that the following conditions
  6. ; are met:
  7. ; * Redistributions of source code must retain the above copyright
  8. ; notice, this list of conditions and the following disclaimer.
  9. ; * Redistributions in binary form must reproduce the above copyright
  10. ; notice, this list of conditions and the following disclaimer in
  11. ; the documentation and/or other materials provided with the
  12. ; distribution.
  13. ; * Neither the name of Intel Corporation nor the names of its
  14. ; contributors may be used to endorse or promote products derived
  15. ; from this software without specific prior written permission.
  16. ;
  17. ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18. ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19. ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20. ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21. ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22. ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23. ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24. ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25. ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26. ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27. ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  29. %ifndef _MULTIBINARY_ASM_
  30. %define _MULTIBINARY_ASM_
  31. %ifidn __OUTPUT_FORMAT__, elf32
  32. %define mbin_def_ptr dd
  33. %define mbin_ptr_sz dword
  34. %define mbin_rdi edi
  35. %define mbin_rsi esi
  36. %define mbin_rax eax
  37. %define mbin_rbx ebx
  38. %define mbin_rcx ecx
  39. %define mbin_rdx edx
  40. %else
  41. %define mbin_def_ptr dq
  42. %define mbin_ptr_sz qword
  43. %define mbin_rdi rdi
  44. %define mbin_rsi rsi
  45. %define mbin_rax rax
  46. %define mbin_rbx rbx
  47. %define mbin_rcx rcx
  48. %define mbin_rdx rdx
  49. %endif
  50. %ifndef AS_FEATURE_LEVEL
  51. %define AS_FEATURE_LEVEL 4
  52. %endif
  53. ;;;;
  54. ; multibinary macro:
  55. ; creates the visable entry point that uses HW optimized call pointer
  56. ; creates the init of the HW optimized call pointer
  57. ;;;;
  58. %macro mbin_interface 1
  59. ;;;;
  60. ; *_dispatched is defaulted to *_mbinit and replaced on first call.
  61. ; Therefore, *_dispatch_init is only executed on first call.
  62. ;;;;
  63. section .data
  64. %1_dispatched:
  65. mbin_def_ptr %1_mbinit
  66. section .text
  67. global %1:ISAL_SYM_TYPE_FUNCTION
  68. %1_mbinit:
  69. ;;; only called the first time to setup hardware match
  70. call %1_dispatch_init
  71. ;;; falls thru to execute the hw optimized code
  72. %1:
  73. jmp mbin_ptr_sz [%1_dispatched]
  74. %endmacro
  75. ;;;;;
  76. ; mbin_dispatch_init parameters
  77. ; Use this function when SSE/00/01 is a minimum requirement
  78. ; 1-> function name
  79. ; 2-> SSE/00/01 optimized function used as base
  80. ; 3-> AVX or AVX/02 opt func
  81. ; 4-> AVX2 or AVX/04 opt func
  82. ;;;;;
  83. %macro mbin_dispatch_init 4
  84. section .text
  85. %1_dispatch_init:
  86. push mbin_rsi
  87. push mbin_rax
  88. push mbin_rbx
  89. push mbin_rcx
  90. push mbin_rdx
  91. lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
  92. mov eax, 1
  93. cpuid
  94. and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
  95. cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
  96. lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
  97. jne _%1_init_done ; AVX is not available so end
  98. mov mbin_rsi, mbin_rbx
  99. ;; Try for AVX2
  100. xor ecx, ecx
  101. mov eax, 7
  102. cpuid
  103. test ebx, FLAG_CPUID7_EBX_AVX2
  104. lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
  105. cmovne mbin_rsi, mbin_rbx
  106. ;; Does it have xmm and ymm support
  107. xor ecx, ecx
  108. xgetbv
  109. and eax, FLAG_XGETBV_EAX_XMM_YMM
  110. cmp eax, FLAG_XGETBV_EAX_XMM_YMM
  111. je _%1_init_done
  112. lea mbin_rsi, [%2 WRT_OPT]
  113. _%1_init_done:
  114. pop mbin_rdx
  115. pop mbin_rcx
  116. pop mbin_rbx
  117. pop mbin_rax
  118. mov [%1_dispatched], mbin_rsi
  119. pop mbin_rsi
  120. ret
  121. %endmacro
  122. ;;;;;
  123. ; mbin_dispatch_init2 parameters
  124. ; Cases where only base functions are available
  125. ; 1-> function name
  126. ; 2-> base function
  127. ;;;;;
  128. %macro mbin_dispatch_init2 2
  129. section .text
  130. %1_dispatch_init:
  131. push mbin_rsi
  132. lea mbin_rsi, [%2 WRT_OPT] ; Default
  133. mov [%1_dispatched], mbin_rsi
  134. pop mbin_rsi
  135. ret
  136. %endmacro
  137. ;;;;;
  138. ; mbin_dispatch_init_clmul 3 parameters
  139. ; Use this case for CRC which needs both SSE4_1 and CLMUL
  140. ; 1-> function name
  141. ; 2-> base function
  142. ; 3-> SSE4_1 and CLMUL optimized function
  143. ;;;;;
  144. %macro mbin_dispatch_init_clmul 3
  145. section .text
  146. %1_dispatch_init:
  147. push mbin_rsi
  148. push mbin_rax
  149. push mbin_rbx
  150. push mbin_rcx
  151. push mbin_rdx
  152. lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
  153. mov eax, 1
  154. cpuid
  155. lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
  156. ; Test for SSE4.2
  157. test ecx, FLAG_CPUID1_ECX_SSE4_1
  158. jz _%1_init_done
  159. test ecx, FLAG_CPUID1_ECX_CLMUL
  160. cmovne mbin_rsi, mbin_rbx
  161. _%1_init_done:
  162. pop mbin_rdx
  163. pop mbin_rcx
  164. pop mbin_rbx
  165. pop mbin_rax
  166. mov [%1_dispatched], mbin_rsi
  167. pop mbin_rsi
  168. ret
  169. %endmacro
  170. ;;;;;
  171. ; mbin_dispatch_init5 parameters
  172. ; 1-> function name
  173. ; 2-> base function
  174. ; 3-> SSE4_2 or 00/01 optimized function
  175. ; 4-> AVX/02 opt func
  176. ; 5-> AVX2/04 opt func
  177. ;;;;;
  178. %macro mbin_dispatch_init5 5
  179. section .text
  180. %1_dispatch_init:
  181. push mbin_rsi
  182. push mbin_rax
  183. push mbin_rbx
  184. push mbin_rcx
  185. push mbin_rdx
  186. lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
  187. mov eax, 1
  188. cpuid
  189. ; Test for SSE4.2
  190. test ecx, FLAG_CPUID1_ECX_SSE4_2
  191. lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
  192. cmovne mbin_rsi, mbin_rbx
  193. and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
  194. cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
  195. lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
  196. jne _%1_init_done ; AVX is not available so end
  197. mov mbin_rsi, mbin_rbx
  198. ;; Try for AVX2
  199. xor ecx, ecx
  200. mov eax, 7
  201. cpuid
  202. test ebx, FLAG_CPUID7_EBX_AVX2
  203. lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
  204. cmovne mbin_rsi, mbin_rbx
  205. ;; Does it have xmm and ymm support
  206. xor ecx, ecx
  207. xgetbv
  208. and eax, FLAG_XGETBV_EAX_XMM_YMM
  209. cmp eax, FLAG_XGETBV_EAX_XMM_YMM
  210. je _%1_init_done
  211. lea mbin_rsi, [%3 WRT_OPT]
  212. _%1_init_done:
  213. pop mbin_rdx
  214. pop mbin_rcx
  215. pop mbin_rbx
  216. pop mbin_rax
  217. mov [%1_dispatched], mbin_rsi
  218. pop mbin_rsi
  219. ret
  220. %endmacro
  221. %if AS_FEATURE_LEVEL >= 6
  222. ;;;;;
  223. ; mbin_dispatch_init6 parameters
  224. ; 1-> function name
  225. ; 2-> base function
  226. ; 3-> SSE4_2 or 00/01 optimized function
  227. ; 4-> AVX/02 opt func
  228. ; 5-> AVX2/04 opt func
  229. ; 6-> AVX512/06 opt func
  230. ;;;;;
  231. %macro mbin_dispatch_init6 6
  232. section .text
  233. %1_dispatch_init:
  234. push mbin_rsi
  235. push mbin_rax
  236. push mbin_rbx
  237. push mbin_rcx
  238. push mbin_rdx
  239. push mbin_rdi
  240. lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
  241. mov eax, 1
  242. cpuid
  243. mov ebx, ecx ; save cpuid1.ecx
  244. test ecx, FLAG_CPUID1_ECX_SSE4_2
  245. je _%1_init_done ; Use base function if no SSE4_2
  246. lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
  247. ;; Test for XMM_YMM support/AVX
  248. test ecx, FLAG_CPUID1_ECX_OSXSAVE
  249. je _%1_init_done
  250. xor ecx, ecx
  251. xgetbv ; xcr -> edx:eax
  252. mov edi, eax ; save xgetvb.eax
  253. and eax, FLAG_XGETBV_EAX_XMM_YMM
  254. cmp eax, FLAG_XGETBV_EAX_XMM_YMM
  255. jne _%1_init_done
  256. test ebx, FLAG_CPUID1_ECX_AVX
  257. je _%1_init_done
  258. lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
  259. ;; Test for AVX2
  260. xor ecx, ecx
  261. mov eax, 7
  262. cpuid
  263. test ebx, FLAG_CPUID7_EBX_AVX2
  264. je _%1_init_done ; No AVX2 possible
  265. lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
  266. ;; Test for AVX512
  267. and edi, FLAG_XGETBV_EAX_ZMM_OPM
  268. cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
  269. jne _%1_init_done ; No AVX512 possible
  270. and ebx, FLAGS_CPUID7_EBX_AVX512_G1
  271. cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
  272. lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
  273. cmove mbin_rsi, mbin_rbx
  274. _%1_init_done:
  275. pop mbin_rdi
  276. pop mbin_rdx
  277. pop mbin_rcx
  278. pop mbin_rbx
  279. pop mbin_rax
  280. mov [%1_dispatched], mbin_rsi
  281. pop mbin_rsi
  282. ret
  283. %endmacro
  284. %else
  285. %macro mbin_dispatch_init6 6
  286. mbin_dispatch_init5 %1, %2, %3, %4, %5
  287. %endmacro
  288. %endif
  289. %if AS_FEATURE_LEVEL >= 10
  290. ;;;;;
  291. ; mbin_dispatch_init7 parameters
  292. ; 1-> function name
  293. ; 2-> base function
  294. ; 3-> SSE4_2 or 00/01 optimized function
  295. ; 4-> AVX/02 opt func
  296. ; 5-> AVX2/04 opt func
  297. ; 6-> AVX512/06 opt func
  298. ; 7-> AVX512 Update/10 opt func
  299. ;;;;;
  300. %macro mbin_dispatch_init7 7
  301. section .text
  302. %1_dispatch_init:
  303. push mbin_rsi
  304. push mbin_rax
  305. push mbin_rbx
  306. push mbin_rcx
  307. push mbin_rdx
  308. push mbin_rdi
  309. lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
  310. mov eax, 1
  311. cpuid
  312. mov ebx, ecx ; save cpuid1.ecx
  313. test ecx, FLAG_CPUID1_ECX_SSE4_2
  314. je _%1_init_done ; Use base function if no SSE4_2
  315. lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
  316. ;; Test for XMM_YMM support/AVX
  317. test ecx, FLAG_CPUID1_ECX_OSXSAVE
  318. je _%1_init_done
  319. xor ecx, ecx
  320. xgetbv ; xcr -> edx:eax
  321. mov edi, eax ; save xgetvb.eax
  322. and eax, FLAG_XGETBV_EAX_XMM_YMM
  323. cmp eax, FLAG_XGETBV_EAX_XMM_YMM
  324. jne _%1_init_done
  325. test ebx, FLAG_CPUID1_ECX_AVX
  326. je _%1_init_done
  327. lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
  328. ;; Test for AVX2
  329. xor ecx, ecx
  330. mov eax, 7
  331. cpuid
  332. test ebx, FLAG_CPUID7_EBX_AVX2
  333. je _%1_init_done ; No AVX2 possible
  334. lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
  335. ;; Test for AVX512
  336. and edi, FLAG_XGETBV_EAX_ZMM_OPM
  337. cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
  338. jne _%1_init_done ; No AVX512 possible
  339. and ebx, FLAGS_CPUID7_EBX_AVX512_G1
  340. cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1
  341. lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
  342. cmove mbin_rsi, mbin_rbx
  343. and ecx, FLAGS_CPUID7_ECX_AVX512_G2
  344. cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2
  345. lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt
  346. cmove mbin_rsi, mbin_rbx
  347. _%1_init_done:
  348. pop mbin_rdi
  349. pop mbin_rdx
  350. pop mbin_rcx
  351. pop mbin_rbx
  352. pop mbin_rax
  353. mov [%1_dispatched], mbin_rsi
  354. pop mbin_rsi
  355. ret
  356. %endmacro
  357. %else
  358. %macro mbin_dispatch_init7 7
  359. mbin_dispatch_init6 %1, %2, %3, %4, %5, %6
  360. %endmacro
  361. %endif
  362. %endif ; ifndef _MULTIBINARY_ASM_