sme-abi.S 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  2. // See https://llvm.org/LICENSE.txt for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. // This patch implements the support routines for the SME ABI,
  5. // described here:
  6. // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines
  7. #include "../assembly.h"
  8. #if !defined(__APPLE__)
  9. #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
  10. #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
  11. #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
  12. #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
  13. #else
  14. // MachO requires @page/@pageoff directives because the global is defined
  15. // in a different file. Otherwise this file may fail to build.
  16. #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
  17. #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
  18. #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
  19. #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
  20. #endif
  21. .arch armv9-a+sme
  22. // Utility function which calls a system's abort() routine. Because the function
  23. // is streaming-compatible it should disable streaming-SVE mode before calling
  24. // abort(). Note that there is no need to preserve any state before the call,
  25. // because the function does not return.
  26. DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
  27. .cfi_startproc
  28. .variant_pcs SYMBOL_NAME(do_abort)
  29. BTI_C
  30. stp x29, x30, [sp, #-32]!
  31. cntd x0
  32. // Store VG to a stack location that we describe with .cfi_offset
  33. str x0, [sp, #16]
  34. .cfi_def_cfa_offset 32
  35. .cfi_offset w30, -24
  36. .cfi_offset w29, -32
  37. .cfi_offset 46, -16
  38. bl __arm_sme_state
  39. tbz x0, #0, 2f
  40. 1:
  41. smstop sm
  42. 2:
  43. // We can't make this into a tail-call because the unwinder would
  44. // need to restore the value of VG.
  45. bl SYMBOL_NAME(abort)
  46. .cfi_endproc
  47. END_COMPILERRT_FUNCTION(do_abort)
  48. // __arm_sme_state fills the result registers based on a local
  49. // that is set as part of the compiler-rt startup code.
  50. // __aarch64_has_sme_and_tpidr2_el0
  51. DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
  52. .variant_pcs __arm_sme_state
  53. BTI_C
  54. mov x0, xzr
  55. mov x1, xzr
  56. adrp x16, TPIDR2_SYMBOL
  57. ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
  58. cbz w16, 1f
  59. 0:
  60. orr x0, x0, #0xC000000000000000
  61. mrs x16, SVCR
  62. bfxil x0, x16, #0, #2
  63. mrs x1, TPIDR2_EL0
  64. 1:
  65. ret
  66. END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
  67. DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
  68. .variant_pcs __arm_tpidr2_restore
  69. BTI_C
  70. // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
  71. // manner.
  72. mrs x14, TPIDR2_EL0
  73. cbnz x14, 2f
  74. // If any of the reserved bytes in the first 16 bytes of BLK are nonzero,
  75. // the subroutine [..] aborts in some platform-defined manner.
  76. ldrh w14, [x0, #10]
  77. cbnz w14, 2f
  78. ldr w14, [x0, #12]
  79. cbnz w14, 2f
  80. // If BLK.za_save_buffer is NULL, the subroutine does nothing.
  81. ldr x16, [x0]
  82. cbz x16, 1f
  83. // If BLK.num_za_save_slices is zero, the subroutine does nothing.
  84. ldrh w14, [x0, #8]
  85. cbz x14, 1f
  86. mov x15, xzr
  87. 0:
  88. ldr za[w15,0], [x16]
  89. addsvl x16, x16, #1
  90. add x15, x15, #1
  91. cmp x14, x15
  92. b.ne 0b
  93. 1:
  94. ret
  95. 2:
  96. b SYMBOL_NAME(do_abort)
  97. END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
  98. DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
  99. .variant_pcs __arm_tpidr2_restore
  100. BTI_C
  101. // If the current thread does not have access to TPIDR2_EL0, the subroutine
  102. // does nothing.
  103. adrp x14, TPIDR2_SYMBOL
  104. ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
  105. cbz w14, 1f
  106. // If TPIDR2_EL0 is null, the subroutine does nothing.
  107. mrs x16, TPIDR2_EL0
  108. cbz x16, 1f
  109. // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
  110. // nonzero, the subroutine [..] aborts in some platform-defined manner.
  111. ldrh w14, [x16, #10]
  112. cbnz w14, 2f
  113. ldr w14, [x16, #12]
  114. cbnz w14, 2f
  115. // If num_za_save_slices is zero, the subroutine does nothing.
  116. ldrh w14, [x16, #8]
  117. cbz x14, 1f
  118. // If za_save_buffer is NULL, the subroutine does nothing.
  119. ldr x16, [x16]
  120. cbz x16, 1f
  121. mov x15, xzr
  122. 0:
  123. str za[w15,0], [x16]
  124. addsvl x16, x16, #1
  125. add x15, x15, #1
  126. cmp x14, x15
  127. b.ne 0b
  128. 1:
  129. ret
  130. 2:
  131. b SYMBOL_NAME(do_abort)
  132. END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
  133. DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
  134. .variant_pcs __arm_tpidr2_restore
  135. BTI_C
  136. // If the current thread does not have access to SME, the subroutine does
  137. // nothing.
  138. adrp x14, TPIDR2_SYMBOL
  139. ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
  140. cbz w14, 0f
  141. // Otherwise, the subroutine behaves as if it did the following:
  142. // * Call __arm_tpidr2_save.
  143. stp x29, x30, [sp, #-16]!
  144. .cfi_def_cfa_offset 16
  145. mov x29, sp
  146. .cfi_def_cfa w29, 16
  147. .cfi_offset w30, -8
  148. .cfi_offset w29, -16
  149. bl __arm_tpidr2_save
  150. // * Set TPIDR2_EL0 to null.
  151. msr TPIDR2_EL0, xzr
  152. // * Set PSTATE.ZA to 0.
  153. smstop za
  154. .cfi_def_cfa wsp, 16
  155. ldp x29, x30, [sp], #16
  156. .cfi_def_cfa_offset 0
  157. .cfi_restore w30
  158. .cfi_restore w29
  159. 0:
  160. ret
  161. END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
  162. DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
  163. .variant_pcs __arm_get_current_vg
  164. BTI_C
  165. stp x29, x30, [sp, #-16]!
  166. .cfi_def_cfa_offset 16
  167. mov x29, sp
  168. .cfi_def_cfa w29, 16
  169. .cfi_offset w30, -8
  170. .cfi_offset w29, -16
  171. adrp x17, CPU_FEATS_SYMBOL
  172. ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
  173. tbnz w17, #30, 0f
  174. adrp x16, TPIDR2_SYMBOL
  175. ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
  176. cbz w16, 1f
  177. 0:
  178. mov x18, x1
  179. bl __arm_sme_state
  180. mov x1, x18
  181. and x17, x17, #0x40000000
  182. bfxil x17, x0, #0, #1
  183. cbz x17, 1f
  184. cntd x0
  185. .cfi_def_cfa wsp, 16
  186. ldp x29, x30, [sp], #16
  187. .cfi_def_cfa_offset 0
  188. .cfi_restore w30
  189. .cfi_restore w29
  190. ret
  191. 1:
  192. mov x0, xzr
  193. .cfi_def_cfa wsp, 16
  194. ldp x29, x30, [sp], #16
  195. .cfi_def_cfa_offset 0
  196. .cfi_restore w30
  197. .cfi_restore w29
  198. ret
  199. END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
  200. NO_EXEC_STACK_DIRECTIVE
  201. // GNU property note for BTI and PAC
  202. GNU_PROPERTY_BTI_PAC