checkasm.S 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. /****************************************************************************
  2. * Assembly testing and benchmarking tool
  3. * Copyright (c) 2015 Martin Storsjo
  4. * Copyright (c) 2015 Janne Grunau
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
  21. *****************************************************************************/
  22. #include "libavutil/arm/asm.S"
  23. /* override fpu so that NEON instructions are rejected */
  24. #if HAVE_VFP
  25. FPU .fpu vfp
  26. ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch
  27. #endif
  28. const register_init, align=3
  29. .quad 0x21f86d66c8ca00ce
  30. .quad 0x75b6ba21077c48ad
  31. .quad 0xed56bb2dcb3c7736
  32. .quad 0x8bda43d3fd1a7e06
  33. .quad 0xb64a9c9e5d318408
  34. .quad 0xdf9a54b303f1d3a3
  35. .quad 0x4a75479abd64e097
  36. .quad 0x249214109d5d1c88
  37. endconst
  38. const error_message_fpscr
  39. .asciz "failed to preserve register FPSCR, changed bits: %x"
  40. error_message_gpr:
  41. .asciz "failed to preserve register r%d"
  42. error_message_vfp:
  43. .asciz "failed to preserve register d%d"
  44. error_message_stack:
  45. .asciz "failed to preserve stack"
  46. endconst
  47. @ max number of args used by any asm function.
  48. #define MAX_ARGS 15
  49. #define ARG_STACK 4*(MAX_ARGS - 4)
  50. @ Align the used stack space to 8 to preserve the stack alignment.
  51. @ +8 for stack canary reference.
  52. #define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed + 8)
  53. .macro clobbercheck variant
  54. .equ pushed, 4*9
  55. function checkasm_checked_call_\variant, export=1
  56. push {r4-r11, lr}
  57. .ifc \variant, vfp
  58. vpush {d8-d15}
  59. fmrx r4, FPSCR
  60. push {r4}
  61. .equ pushed, pushed + 16*4 + 4
  62. .endif
  63. movrel r12, register_init
  64. .ifc \variant, vfp
  65. vldm r12, {d8-d15}
  66. .endif
  67. ldm r12, {r4-r11}
  68. sub sp, sp, #ARG_STACK_A
  69. .equ pos, 0
  70. .rept MAX_ARGS-4
  71. ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
  72. str r12, [sp, #pos]
  73. .equ pos, pos + 4
  74. .endr
  75. @ For stack overflows, the callee is free to overwrite the parameters
  76. @ that were passed on the stack (if any), so we can only check after
  77. @ that point. First figure out how many parameters the function
  78. @ really took on the stack:
  79. ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
  80. @ Load the first non-parameter value from the stack, that should be
  81. @ left untouched by the function. Store a copy of it inverted, so that
  82. @ e.g. overwriting everything with zero would be noticed.
  83. ldr r12, [sp, r12, lsl #2]
  84. mvn r12, r12
  85. str r12, [sp, #ARG_STACK_A - 4]
  86. mov r12, r0
  87. mov r0, r2
  88. mov r1, r3
  89. ldrd r2, r3, [sp, #ARG_STACK_A + pushed]
  90. @ Call the target function
  91. blx r12
  92. @ Load the number of stack parameters, stack canary and its reference
  93. ldr r12, [sp, #ARG_STACK_A + pushed + 8 + 4*(MAX_ARGS-4)]
  94. ldr r2, [sp, r12, lsl #2]
  95. ldr r3, [sp, #ARG_STACK_A - 4]
  96. add sp, sp, #ARG_STACK_A
  97. push {r0, r1}
  98. mvn r3, r3
  99. cmp r2, r3
  100. bne 5f
  101. movrel r12, register_init
  102. .ifc \variant, vfp
  103. .macro check_reg_vfp, dreg, offset
  104. ldrd r2, r3, [r12, #8 * (\offset)]
  105. vmov r0, lr, \dreg
  106. eor r2, r2, r0
  107. eor r3, r3, lr
  108. orrs r2, r2, r3
  109. bne 4f
  110. .endm
  111. .irp n, 8, 9, 10, 11, 12, 13, 14, 15
  112. @ keep track of the checked double/SIMD register
  113. mov r1, #\n
  114. check_reg_vfp d\n, \n-8
  115. .endr
  116. .purgem check_reg_vfp
  117. fmrx r1, FPSCR
  118. ldr r3, [sp, #8]
  119. eor r1, r1, r3
  120. @ Ignore changes in bits 0-4 and 7
  121. bic r1, r1, #0x9f
  122. @ Ignore changes in the topmost 5 bits
  123. bics r1, r1, #0xf8000000
  124. bne 3f
  125. .endif
  126. @ keep track of the checked GPR
  127. mov r1, #4
  128. .macro check_reg reg1, reg2=
  129. ldrd r2, r3, [r12], #8
  130. eors r2, r2, \reg1
  131. bne 2f
  132. add r1, r1, #1
  133. .ifnb \reg2
  134. eors r3, r3, \reg2
  135. bne 2f
  136. .endif
  137. add r1, r1, #1
  138. .endm
  139. check_reg r4, r5
  140. check_reg r6, r7
  141. @ r9 is a volatile register in the ios ABI
  142. #ifdef __APPLE__
  143. check_reg r8
  144. #else
  145. check_reg r8, r9
  146. #endif
  147. check_reg r10, r11
  148. .purgem check_reg
  149. b 0f
  150. 5:
  151. movrel r0, error_message_stack
  152. b 1f
  153. 4:
  154. movrel r0, error_message_vfp
  155. b 1f
  156. 3:
  157. movrel r0, error_message_fpscr
  158. b 1f
  159. 2:
  160. movrel r0, error_message_gpr
  161. 1:
  162. bl X(checkasm_fail_func)
  163. 0:
  164. pop {r0, r1}
  165. .ifc \variant, vfp
  166. pop {r2}
  167. fmxr FPSCR, r2
  168. vpop {d8-d15}
  169. .endif
  170. pop {r4-r11, pc}
  171. endfunc
  172. .endm
  173. #if HAVE_VFP || HAVE_NEON
  174. clobbercheck vfp
  175. #endif
  176. clobbercheck novfp