rgb2yuv_neon_common.S 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/arm/asm.S"
  21. .macro alias name, tgt, set=1
  22. .if \set != 0
  23. \name .req \tgt
  24. .else
  25. .unreq \name
  26. .endif
  27. .endm
  28. .altmacro
  29. .macro alias_dw_all qw, dw_l, dw_h
  30. alias q\qw\()_l, d\dw_l
  31. alias q\qw\()_h, d\dw_h
  32. .if \qw < 15
  33. alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
  34. .endif
  35. .endm
  36. alias_dw_all 0, 0, 1
  37. .noaltmacro
  38. .macro alias_qw name, qw, set=1
  39. alias \name\(), \qw, \set
  40. alias \name\()_l, \qw\()_l, \set
  41. alias \name\()_h, \qw\()_h, \set
  42. .endm
  43. .macro prologue
  44. push {r4-r12, lr}
  45. vpush {q4-q7}
  46. .endm
  47. .macro epilogue
  48. vpop {q4-q7}
  49. pop {r4-r12, pc}
  50. .endm
  51. .macro load_arg reg, ix
  52. ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
  53. .endm
  54. /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
  55. * int width, int height,
  56. * int y_stride, int c_stride, int src_stride,
  57. * int32_t coeff_table[9]);
  58. */
  59. .macro alias_loop_420sp set=1
  60. alias src, r0, \set
  61. alias src0, src, \set
  62. alias y, r1, \set
  63. alias y0, y, \set
  64. alias chroma, r2, \set
  65. alias width, r3, \set
  66. alias header, width, \set
  67. alias height, r4, \set
  68. alias y_stride, r5, \set
  69. alias c_stride, r6, \set
  70. alias c_padding, c_stride, \set
  71. alias src_stride, r7, \set
  72. alias y0_end, r8, \set
  73. alias src_padding,r9, \set
  74. alias y_padding, r10, \set
  75. alias src1, r11, \set
  76. alias y1, r12, \set
  77. alias coeff_table,r12, \set
  78. .endm
  79. .macro loop_420sp s_fmt, d_fmt, init, kernel, precision
  80. function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
  81. prologue
  82. alias_loop_420sp
  83. load_arg height, 4
  84. load_arg y_stride, 5
  85. load_arg c_stride, 6
  86. load_arg src_stride, 7
  87. load_arg coeff_table, 8
  88. \init coeff_table
  89. sub y_padding, y_stride, width
  90. sub c_padding, c_stride, width
  91. sub src_padding, src_stride, width, LSL #2
  92. add y0_end, y0, width
  93. and header, width, #15
  94. add y1, y0, y_stride
  95. add src1, src0, src_stride
  96. 0:
  97. cmp header, #0
  98. beq 1f
  99. \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
  100. 1:
  101. \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
  102. cmp y0, y0_end
  103. blt 1b
  104. 2:
  105. add y0, y1, y_padding
  106. add y0_end, y1, y_stride
  107. add chroma, chroma, c_padding
  108. add src0, src1, src_padding
  109. add y1, y0, y_stride
  110. add src1, src0, src_stride
  111. subs height, height, #2
  112. bgt 0b
  113. epilogue
  114. alias_loop_420sp 0
  115. endfunc
  116. .endm
  117. .macro downsample
  118. vpaddl.u8 r16x8, r8x16
  119. vpaddl.u8 g16x8, g8x16
  120. vpaddl.u8 b16x8, b8x16
  121. .endm
  122. /* acculumate and right shift by 2 */
  123. .macro downsample_ars2
  124. vpadal.u8 r16x8, r8x16
  125. vpadal.u8 g16x8, g8x16
  126. vpadal.u8 b16x8, b8x16
  127. vrshr.u16 r16x8, r16x8, #2
  128. vrshr.u16 g16x8, g16x8, #2
  129. vrshr.u16 b16x8, b16x8, #2
  130. .endm
  131. .macro store_y8_16x1 dst, count
  132. .ifc "\count",""
  133. vstmia \dst!, {y8x16}
  134. .else
  135. vstmia \dst, {y8x16}
  136. add \dst, \dst, \count
  137. .endif
  138. .endm
  139. .macro store_chroma_nv12_8x1 dst, count
  140. .ifc "\count",""
  141. vst2.i8 {u8x8, v8x8}, [\dst]!
  142. .else
  143. vst2.i8 {u8x8, v8x8}, [\dst], \count
  144. .endif
  145. .endm
  146. .macro store_chroma_nv21_8x1 dst, count
  147. .ifc "\count",""
  148. vst2.i8 {v8x8, u8x8}, [\dst]!
  149. .else
  150. vst2.i8 {v8x8, u8x8}, [\dst], \count
  151. .endif
  152. .endm
  153. .macro load_8888_16x1 a, b, c, d, src, count
  154. .ifc "\count",""
  155. vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
  156. vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
  157. .else
  158. vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
  159. vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
  160. sub \src, \src, #32
  161. add \src, \src, \count, LSL #2
  162. .endif
  163. .endm
  164. .macro load_rgbx_16x1 src, count
  165. load_8888_16x1 r, g, b, x, \src, \count
  166. .endm
  167. .macro load_bgrx_16x1 src, count
  168. load_8888_16x1 b, g, r, x, \src, \count
  169. .endm
  170. .macro alias_src_rgbx set=1
  171. alias_src_8888 r, g, b, x, \set
  172. .endm
  173. .macro alias_src_bgrx set=1
  174. alias_src_8888 b, g, r, x, \set
  175. .endm
  176. .macro alias_dst_nv12 set=1
  177. alias u8x8, c8x8x2_l, \set
  178. alias v8x8, c8x8x2_h, \set
  179. .endm
  180. .macro alias_dst_nv21 set=1
  181. alias v8x8, c8x8x2_l, \set
  182. alias u8x8, c8x8x2_h, \set
  183. .endm
  184. // common aliases
  185. alias CO_R d0
  186. CO_RY .dn d0.s16[0]
  187. CO_RU .dn d0.s16[1]
  188. CO_RV .dn d0.s16[2]
  189. alias CO_G d1
  190. CO_GY .dn d1.s16[0]
  191. CO_GU .dn d1.s16[1]
  192. CO_GV .dn d1.s16[2]
  193. alias CO_B d2
  194. CO_BY .dn d2.s16[0]
  195. CO_BU .dn d2.s16[1]
  196. CO_BV .dn d2.s16[2]
  197. alias BIAS_U, d3
  198. alias BIAS_V, BIAS_U
  199. alias BIAS_Y, q2
  200. /* q3-q6 R8G8B8X8 x16 */
  201. .macro alias_src_8888 a, b, c, d, set
  202. alias_qw \a\()8x16, q3, \set
  203. alias_qw \b\()8x16, q4, \set
  204. alias_qw \c\()8x16, q5, \set
  205. alias_qw \d\()8x16, q6, \set
  206. .endm
  207. .macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
  208. alias_src_\rgb_fmt
  209. alias_dst_\yuv_fmt
  210. load_\rgb_fmt\()_16x1 \rgb0, \count
  211. downsample
  212. compute_y_16x1
  213. store_y8_16x1 \y0, \count
  214. load_\rgb_fmt\()_16x1 \rgb1, \count
  215. downsample_ars2
  216. compute_y_16x1
  217. store_y8_16x1 \y1, \count
  218. compute_chroma_8x1 u, U
  219. compute_chroma_8x1 v, V
  220. store_chroma_\yuv_fmt\()_8x1 \chroma, \count
  221. alias_dst_\yuv_fmt 0
  222. alias_src_\rgb_fmt 0
  223. .endm