aarch64_multibinary.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /**********************************************************************
  2. Copyright(c) 2020 Arm Corporation All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in
  10. the documentation and/or other materials provided with the
  11. distribution.
  12. * Neither the name of Arm Corporation nor the names of its
  13. contributors may be used to endorse or promote products derived
  14. from this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. **********************************************************************/
  27. #ifndef __AARCH64_MULTIBINARY_H__
  28. #define __AARCH64_MULTIBINARY_H__
  29. #ifndef __aarch64__
  30. #error "This file is for aarch64 only"
  31. #endif
  32. #include "aarch64_label.h"
  33. #ifdef __ASSEMBLY__
  34. /**
  35. * # mbin_interface : the wrapper layer for isal-l api
  36. *
  37. * ## references:
  38. * * https://sourceware.org/git/gitweb.cgi?p=glibc.git;a=blob;f=sysdeps/aarch64/dl-trampoline.S
  39. * * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
  40. * * https://static.docs.arm.com/ihi0057/b/IHI0057B_aadwarf64.pdf?_ga=2.80574487.1870739014.1564969896-1634778941.1548729310
  41. *
  42. * ## Usage:
  43. * 1. Define dispather function
  44. * 2. name must be \name\()_dispatcher
  45. * 3. Prototype should be *"void * \name\()_dispatcher"*
  46. * 4. The dispather should return the right function pointer , revision and a string information .
  47. **/
  48. .macro mbin_interface name:req
  49. .extern cdecl(\name\()_dispatcher)
  50. .data
  51. .balign 8
  52. .global cdecl(\name\()_dispatcher_info)
  53. #ifndef __APPLE__
  54. .type \name\()_dispatcher_info,%object
  55. #endif
  56. cdecl(\name\()_dispatcher_info):
  57. .quad \name\()_mbinit //func_entry
  58. #ifndef __APPLE__
  59. .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
  60. #endif
  61. .balign 8
  62. .text
  63. \name\()_mbinit:
  64. //save lp fp, sub sp
  65. .cfi_startproc
  66. stp x29, x30, [sp, -224]!
  67. //add cfi directive to avoid GDB bt cmds error
  68. //set cfi(Call Frame Information)
  69. .cfi_def_cfa_offset 224
  70. .cfi_offset 29, -224
  71. .cfi_offset 30, -216
  72. //save parameter/result/indirect result registers
  73. stp x8, x9, [sp, 16]
  74. .cfi_offset 8, -208
  75. .cfi_offset 9, -200
  76. stp x0, x1, [sp, 32]
  77. .cfi_offset 0, -192
  78. .cfi_offset 1, -184
  79. stp x2, x3, [sp, 48]
  80. .cfi_offset 2, -176
  81. .cfi_offset 3, -168
  82. stp x4, x5, [sp, 64]
  83. .cfi_offset 4, -160
  84. .cfi_offset 5, -152
  85. stp x6, x7, [sp, 80]
  86. .cfi_offset 6, -144
  87. .cfi_offset 7, -136
  88. stp q0, q1, [sp, 96]
  89. .cfi_offset 64, -128
  90. .cfi_offset 65, -112
  91. stp q2, q3, [sp, 128]
  92. .cfi_offset 66, -96
  93. .cfi_offset 67, -80
  94. stp q4, q5, [sp, 160]
  95. .cfi_offset 68, -64
  96. .cfi_offset 69, -48
  97. stp q6, q7, [sp, 192]
  98. .cfi_offset 70, -32
  99. .cfi_offset 71, -16
  100. /**
  101. * The dispatcher functions have the following prototype:
  102. * void * function_dispatcher(void)
  103. * As the dispatcher is returning a struct, by the AAPCS,
  104. */
  105. bl cdecl(\name\()_dispatcher)
  106. //restore temp/indirect result registers
  107. ldp x8, x9, [sp, 16]
  108. .cfi_restore 8
  109. .cfi_restore 9
  110. // save function entry
  111. str x0, [x9]
  112. //restore parameter/result registers
  113. ldp x0, x1, [sp, 32]
  114. .cfi_restore 0
  115. .cfi_restore 1
  116. ldp x2, x3, [sp, 48]
  117. .cfi_restore 2
  118. .cfi_restore 3
  119. ldp x4, x5, [sp, 64]
  120. .cfi_restore 4
  121. .cfi_restore 5
  122. ldp x6, x7, [sp, 80]
  123. .cfi_restore 6
  124. .cfi_restore 7
  125. ldp q0, q1, [sp, 96]
  126. .cfi_restore 64
  127. .cfi_restore 65
  128. ldp q2, q3, [sp, 128]
  129. .cfi_restore 66
  130. .cfi_restore 67
  131. ldp q4, q5, [sp, 160]
  132. .cfi_restore 68
  133. .cfi_restore 69
  134. ldp q6, q7, [sp, 192]
  135. .cfi_restore 70
  136. .cfi_restore 71
  137. //save lp fp and sp
  138. ldp x29, x30, [sp], 224
  139. //restore cfi setting
  140. .cfi_restore 30
  141. .cfi_restore 29
  142. .cfi_def_cfa_offset 0
  143. .cfi_endproc
  144. .global cdecl(\name)
  145. #ifndef __APPLE__
  146. .type \name,%function
  147. #endif
  148. .align 2
  149. cdecl(\name\()):
  150. #ifndef __APPLE__
  151. adrp x9, :got:\name\()_dispatcher_info
  152. ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info]
  153. #else
  154. adrp x9, cdecl(\name\()_dispatcher_info)@GOTPAGE
  155. ldr x9, [x9, #cdecl(\name\()_dispatcher_info)@GOTPAGEOFF]
  156. #endif
  157. ldr x10,[x9]
  158. br x10
  159. #ifndef __APPLE__
  160. .size \name,. - \name
  161. #endif
  162. .endm
  163. /**
  164. * mbin_interface_base is used for the interfaces which have only
  165. * noarch implementation
  166. */
  167. .macro mbin_interface_base name:req, base:req
  168. .extern \base
  169. .data
  170. .balign 8
  171. .global cdecl(\name\()_dispatcher_info)
  172. #ifndef __APPLE__
  173. .type \name\()_dispatcher_info,%object
  174. #endif
  175. cdecl(\name\()_dispatcher_info):
  176. .quad \base //func_entry
  177. #ifndef __APPLE__
  178. .size \name\()_dispatcher_info,. - \name\()_dispatcher_info
  179. #endif
  180. .balign 8
  181. .text
  182. .global cdecl(\name)
  183. #ifndef __APPLE__
  184. .type \name,%function
  185. #endif
  186. .align 2
  187. cdecl(\name\()):
  188. #ifndef __APPLE__
  189. adrp x9, :got:cdecl(_\name\()_dispatcher_info)
  190. ldr x9, [x9, #:got_lo12:cdecl(_\name\()_dispatcher_info)]
  191. #else
  192. adrp x9, cdecl(_\name\()_dispatcher_info)@GOTPAGE
  193. ldr x9, [x9, #cdecl(_\name\()_dispatcher_info)@GOTPAGEOFF]
  194. #endif
  195. ldr x10,[x9]
  196. br x10
  197. #ifndef __APPLE__
  198. .size \name,. - \name
  199. #endif
  200. .endm
  201. #else /* __ASSEMBLY__ */
  202. #include <stdint.h>
  203. #if defined(__linux__)
  204. #include <sys/auxv.h>
  205. #include <asm/hwcap.h>
  206. #elif defined(__APPLE__)
  207. #define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
  208. #define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
  209. #define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
  210. #include <sys/sysctl.h>
  211. #include <stddef.h>
  212. static inline int sysctlEnabled(const char* name){
  213. int enabled;
  214. size_t size = sizeof(enabled);
  215. int status = sysctlbyname(name, &enabled, &size, NULL, 0);
  216. return status ? 0 : enabled;
  217. }
  218. #endif
  219. #define DEFINE_INTERFACE_DISPATCHER(name) \
  220. void * name##_dispatcher(void)
  221. #define PROVIDER_BASIC(name) \
  222. PROVIDER_INFO(name##_base)
  223. #define DO_DIGNOSTIC(x) _Pragma GCC diagnostic ignored "-W"#x
  224. #define DO_PRAGMA(x) _Pragma (#x)
  225. #define DIGNOSTIC_IGNORE(x) DO_PRAGMA(GCC diagnostic ignored #x)
  226. #define DIGNOSTIC_PUSH() DO_PRAGMA(GCC diagnostic push)
  227. #define DIGNOSTIC_POP() DO_PRAGMA(GCC diagnostic pop)
  228. #define PROVIDER_INFO(_func_entry) \
  229. ({ DIGNOSTIC_PUSH() \
  230. DIGNOSTIC_IGNORE(-Wnested-externs) \
  231. extern void _func_entry(void); \
  232. DIGNOSTIC_POP() \
  233. _func_entry; \
  234. })
  235. /**
  236. * Micro-Architector definitions
  237. * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1
  238. */
  239. #define CPU_IMPLEMENTER_RESERVE 0x00
  240. #define CPU_IMPLEMENTER_ARM 0x41
  241. #define CPU_PART_CORTEX_A57 0xD07
  242. #define CPU_PART_CORTEX_A72 0xD08
  243. #define CPU_PART_NEOVERSE_N1 0xD0C
  244. #define MICRO_ARCH_ID(imp,part) \
  245. (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4))
  246. #ifndef HWCAP_CPUID
  247. #define HWCAP_CPUID (1<<11)
  248. #endif
  249. /**
  250. * @brief get_micro_arch_id
  251. *
  252. * read micro-architector register instruction if possible.This function
  253. * provides microarchitecture information and make microarchitecture optimization
  254. * possible.
  255. *
  256. * Read system registers(MRS) is forbidden in userspace. If executed, it
  257. * will raise illegal instruction error. Kernel provides a solution for
  258. * this issue. The solution depends on HWCAP_CPUID flags. Reference(1)
  259. * describes how to use it. It provides a "illegal insstruction" handler
  260. * in kernel space, the handler will execute MRS and return the correct
  261. * value to userspace.
  262. *
  263. * To avoid too many kernel trap, this function MUST be only called in
  264. * dispatcher. And HWCAP must be match,That will make sure there are no
  265. * illegal instruction errors. HWCAP_CPUID should be available to get the
  266. * best performance.
  267. *
  268. * NOTICE:
  269. * - HWCAP_CPUID should be available. Otherwise it returns reserve value
  270. * - It MUST be called inside dispather.
  271. * - It MUST meet the HWCAP requirements
  272. *
  273. * Example:
  274. * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi)
  275. * {
  276. * unsigned long auxval = getauxval(AT_HWCAP);
  277. * // MUST do the judgement is MUST.
  278. * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) {
  279. * switch (get_micro_arch_id()) {
  280. * case MICRO_ARCH_ID(ARM, CORTEX_A57):
  281. * return PROVIDER_INFO(crc32_pmull_crc_for_a57);
  282. * case MICRO_ARCH_ID(ARM, CORTEX_A72):
  283. * return PROVIDER_INFO(crc32_pmull_crc_for_a72);
  284. * case MICRO_ARCH_ID(ARM, NEOVERSE_N1):
  285. * return PROVIDER_INFO(crc32_pmull_crc_for_n1);
  286. * case default:
  287. * return PROVIDER_INFO(crc32_pmull_crc_for_others);
  288. * }
  289. * }
  290. * return PROVIDER_BASIC(crc32_iscsi);
  291. * }
  292. * KNOWN ISSUE:
  293. * On a heterogeneous system (big.LITTLE), it will work but the performance
  294. * might not be the best one as expected.
  295. *
  296. * If this function is called on the big core, it will return the function
  297. * optimized for the big core.
  298. *
  299. * If execution is then scheduled to the little core. It will still work (1),
  300. * but the function won't be optimized for the little core, thus the performance
  301. * won't be as expected.
  302. *
  303. * References:
  304. * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5)
  305. *
  306. */
  307. static inline uint32_t get_micro_arch_id(void)
  308. {
  309. uint32_t id=CPU_IMPLEMENTER_RESERVE;
  310. #ifndef __APPLE__
  311. if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) {
  312. /** Here will trap into kernel space */
  313. asm("mrs %0, MIDR_EL1 " : "=r" (id));
  314. }
  315. #endif
  316. return id&0xff00fff0;
  317. }
  318. #endif /* __ASSEMBLY__ */
  319. #endif