swscale.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "libavutil/attributes.h"
  20. #include "libswscale/swscale.h"
  21. #include "libswscale/swscale_internal.h"
  22. #include "libavutil/aarch64/cpu.h"
  23. void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW,
  24. const uint8_t *_src, const int16_t *filter,
  25. const int32_t *filterPos, int filterSize);
  26. void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW,
  27. const uint8_t *_src, const int16_t *filter,
  28. const int32_t *filterPos, int filterSize);
  29. void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW,
  30. const uint8_t *_src, const int16_t *filter,
  31. const int32_t *filterPos, int filterSize);
  32. void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW,
  33. const uint8_t *_src, const int16_t *filter,
  34. const int32_t *filterPos, int filterSize);
  35. void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW,
  36. const uint8_t *_src, const int16_t *filter,
  37. const int32_t *filterPos, int filterSize);
  38. void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW,
  39. const uint8_t *_src, const int16_t *filter,
  40. const int32_t *filterPos, int filterSize);
  41. static void ff_hscale16to15_4_neon(SwsContext *c, int16_t *_dst, int dstW,
  42. const uint8_t *_src, const int16_t *filter,
  43. const int32_t *filterPos, int filterSize)
  44. {
  45. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  46. int sh = desc->comp[0].depth - 1;
  47. if (sh<15) {
  48. sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
  49. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  50. sh = 16 - 1;
  51. }
  52. ff_hscale16to15_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  53. }
  54. static void ff_hscale16to15_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
  55. const uint8_t *_src, const int16_t *filter,
  56. const int32_t *filterPos, int filterSize)
  57. {
  58. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  59. int sh = desc->comp[0].depth - 1;
  60. if (sh<15) {
  61. sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
  62. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  63. sh = 16 - 1;
  64. }
  65. ff_hscale16to15_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  66. }
  67. static void ff_hscale16to15_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
  68. const uint8_t *_src, const int16_t *filter,
  69. const int32_t *filterPos, int filterSize)
  70. {
  71. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  72. int sh = desc->comp[0].depth - 1;
  73. if (sh<15) {
  74. sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
  75. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  76. sh = 16 - 1;
  77. }
  78. ff_hscale16to15_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  79. }
  80. static void ff_hscale16to19_4_neon(SwsContext *c, int16_t *_dst, int dstW,
  81. const uint8_t *_src, const int16_t *filter,
  82. const int32_t *filterPos, int filterSize)
  83. {
  84. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  85. int bits = desc->comp[0].depth - 1;
  86. int sh = bits - 4;
  87. if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
  88. sh = 9;
  89. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  90. sh = 16 - 1 - 4;
  91. }
  92. ff_hscale16to19_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  93. }
  94. static void ff_hscale16to19_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
  95. const uint8_t *_src, const int16_t *filter,
  96. const int32_t *filterPos, int filterSize)
  97. {
  98. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  99. int bits = desc->comp[0].depth - 1;
  100. int sh = bits - 4;
  101. if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
  102. sh = 9;
  103. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  104. sh = 16 - 1 - 4;
  105. }
  106. ff_hscale16to19_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  107. }
  108. static void ff_hscale16to19_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
  109. const uint8_t *_src, const int16_t *filter,
  110. const int32_t *filterPos, int filterSize)
  111. {
  112. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
  113. int bits = desc->comp[0].depth - 1;
  114. int sh = bits - 4;
  115. if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
  116. sh = 9;
  117. } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
  118. sh = 16 - 1 - 4;
  119. }
  120. ff_hscale16to19_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
  121. }
  122. #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
  123. void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
  124. SwsContext *c, int16_t *data, \
  125. int dstW, const uint8_t *src, \
  126. const int16_t *filter, \
  127. const int32_t *filterPos, int filterSize)
  128. #define SCALE_FUNCS(filter_n, opt) \
  129. SCALE_FUNC(filter_n, 8, 15, opt); \
  130. SCALE_FUNC(filter_n, 8, 19, opt);
  131. #define ALL_SCALE_FUNCS(opt) \
  132. SCALE_FUNCS(4, opt); \
  133. SCALE_FUNCS(X8, opt); \
  134. SCALE_FUNCS(X4, opt)
  135. ALL_SCALE_FUNCS(neon);
  136. void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
  137. const int16_t **src, uint8_t *dest, int dstW,
  138. const uint8_t *dither, int offset);
  139. void ff_yuv2plane1_8_neon(
  140. const int16_t *src,
  141. uint8_t *dest,
  142. int dstW,
  143. const uint8_t *dither,
  144. int offset);
  145. #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt) do { \
  146. if (c->srcBpc == 8) { \
  147. if(c->dstBpc <= 14) { \
  148. hscalefn = \
  149. ff_hscale8to15_ ## filtersize ## _ ## opt; \
  150. } else \
  151. hscalefn = \
  152. ff_hscale8to19_ ## filtersize ## _ ## opt; \
  153. } else { \
  154. if (c->dstBpc <= 14) \
  155. hscalefn = \
  156. ff_hscale16to15_ ## filtersize ## _ ## opt; \
  157. else \
  158. hscalefn = \
  159. ff_hscale16to19_ ## filtersize ## _ ## opt; \
  160. } \
  161. } while (0)
  162. #define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \
  163. if (filtersize == 4) \
  164. ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \
  165. else if (filtersize % 8 == 0) \
  166. ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \
  167. else if (filtersize % 4 == 0 && filtersize % 8 != 0) \
  168. ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \
  169. } while (0)
  170. #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
  171. switch (c->dstBpc) { \
  172. case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
  173. default: break; \
  174. }
  175. av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
  176. {
  177. int cpu_flags = av_get_cpu_flags();
  178. if (have_neon(cpu_flags)) {
  179. ASSIGN_SCALE_FUNC(c->hyScale, c->hLumFilterSize, neon);
  180. ASSIGN_SCALE_FUNC(c->hcScale, c->hChrFilterSize, neon);
  181. ASSIGN_VSCALE_FUNC(c->yuv2plane1, neon);
  182. if (c->dstBpc == 8) {
  183. c->yuv2planeX = ff_yuv2planeX_8_neon;
  184. }
  185. }
  186. }