dsputil_arm.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * ARM optimized DSP utils
  3. * Copyright (c) 2001 Lionel Ulmer.
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavcodec/dsputil.h"
  22. #if HAVE_IPP
  23. #include <ipp.h>
  24. #endif
  25. void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
  26. void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
  27. void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
  28. void j_rev_dct_ARM(DCTELEM *data);
  29. void simple_idct_ARM(DCTELEM *data);
  30. void simple_idct_armv5te(DCTELEM *data);
  31. void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  32. void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
  33. void ff_simple_idct_armv6(DCTELEM *data);
  34. void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  35. void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
  36. void ff_simple_idct_neon(DCTELEM *data);
  37. void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
  38. void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
  39. /* XXX: local hack */
  40. static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  41. static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
  42. void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  43. void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  44. void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  45. void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  46. void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  47. void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  48. void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  49. void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
  50. void ff_prefetch_arm(void *mem, int stride, int h);
  51. CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
  52. CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
  53. CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
  54. CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
  55. CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
  56. CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
  57. void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
  58. int line_size);
  59. /* XXX: those functions should be suppressed ASAP when all IDCTs are
  60. converted */
  61. static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  62. {
  63. j_rev_dct_ARM (block);
  64. ff_put_pixels_clamped(block, dest, line_size);
  65. }
  66. static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  67. {
  68. j_rev_dct_ARM (block);
  69. ff_add_pixels_clamped(block, dest, line_size);
  70. }
  71. static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
  72. {
  73. simple_idct_ARM (block);
  74. ff_put_pixels_clamped(block, dest, line_size);
  75. }
  76. static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
  77. {
  78. simple_idct_ARM (block);
  79. ff_add_pixels_clamped(block, dest, line_size);
  80. }
  81. #if HAVE_IPP
  82. static void simple_idct_ipp(DCTELEM *block)
  83. {
  84. ippiDCT8x8Inv_Video_16s_C1I(block);
  85. }
  86. static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
  87. {
  88. ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
  89. }
  90. void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
  91. static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
  92. {
  93. ippiDCT8x8Inv_Video_16s_C1I(block);
  94. #if HAVE_IWMMXT
  95. add_pixels_clamped_iwmmxt(block, dest, line_size);
  96. #else
  97. ff_add_pixels_clamped_ARM(block, dest, line_size);
  98. #endif
  99. }
  100. #endif
  101. int mm_support(void)
  102. {
  103. return HAVE_IWMMXT * FF_MM_IWMMXT;
  104. }
  105. void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
  106. {
  107. int idct_algo= avctx->idct_algo;
  108. ff_put_pixels_clamped = c->put_pixels_clamped;
  109. ff_add_pixels_clamped = c->add_pixels_clamped;
  110. if (avctx->lowres == 0) {
  111. if(idct_algo == FF_IDCT_AUTO){
  112. #if HAVE_IPP
  113. idct_algo = FF_IDCT_IPP;
  114. #elif HAVE_NEON
  115. idct_algo = FF_IDCT_SIMPLENEON;
  116. #elif HAVE_ARMV6
  117. idct_algo = FF_IDCT_SIMPLEARMV6;
  118. #elif HAVE_ARMV5TE
  119. idct_algo = FF_IDCT_SIMPLEARMV5TE;
  120. #else
  121. idct_algo = FF_IDCT_ARM;
  122. #endif
  123. }
  124. if(idct_algo==FF_IDCT_ARM){
  125. c->idct_put= j_rev_dct_ARM_put;
  126. c->idct_add= j_rev_dct_ARM_add;
  127. c->idct = j_rev_dct_ARM;
  128. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  129. } else if (idct_algo==FF_IDCT_SIMPLEARM){
  130. c->idct_put= simple_idct_ARM_put;
  131. c->idct_add= simple_idct_ARM_add;
  132. c->idct = simple_idct_ARM;
  133. c->idct_permutation_type= FF_NO_IDCT_PERM;
  134. #if HAVE_ARMV6
  135. } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
  136. c->idct_put= ff_simple_idct_put_armv6;
  137. c->idct_add= ff_simple_idct_add_armv6;
  138. c->idct = ff_simple_idct_armv6;
  139. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  140. #endif
  141. #if HAVE_ARMV5TE
  142. } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
  143. c->idct_put= simple_idct_put_armv5te;
  144. c->idct_add= simple_idct_add_armv5te;
  145. c->idct = simple_idct_armv5te;
  146. c->idct_permutation_type = FF_NO_IDCT_PERM;
  147. #endif
  148. #if HAVE_IPP
  149. } else if (idct_algo==FF_IDCT_IPP){
  150. c->idct_put= simple_idct_ipp_put;
  151. c->idct_add= simple_idct_ipp_add;
  152. c->idct = simple_idct_ipp;
  153. c->idct_permutation_type= FF_NO_IDCT_PERM;
  154. #endif
  155. #if HAVE_NEON
  156. } else if (idct_algo==FF_IDCT_SIMPLENEON){
  157. c->idct_put= ff_simple_idct_put_neon;
  158. c->idct_add= ff_simple_idct_add_neon;
  159. c->idct = ff_simple_idct_neon;
  160. c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
  161. #endif
  162. }
  163. }
  164. c->put_pixels_tab[0][0] = put_pixels16_arm;
  165. c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
  166. c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
  167. c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
  168. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
  169. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
  170. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
  171. c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
  172. c->put_pixels_tab[1][0] = put_pixels8_arm;
  173. c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
  174. c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
  175. c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
  176. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
  177. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
  178. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
  179. c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
  180. #if HAVE_ARMV5TE
  181. c->prefetch = ff_prefetch_arm;
  182. #endif
  183. #if HAVE_IWMMXT
  184. dsputil_init_iwmmxt(c, avctx);
  185. #endif
  186. #if HAVE_ARMVFP
  187. ff_float_init_arm_vfp(c, avctx);
  188. #endif
  189. #if HAVE_NEON
  190. ff_dsputil_init_neon(c, avctx);
  191. #endif
  192. }