audio_mix_init.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  3. *
  4. * This file is part of Libav.
  5. *
  6. * Libav is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * Libav is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with Libav; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/cpu.h"
  22. #include "libavutil/x86/cpu.h"
  23. #include "libavresample/audio_mix.h"
  24. void ff_mix_2_to_1_fltp_flt_sse(float **src, float **matrix, int len,
  25. int out_ch, int in_ch);
  26. void ff_mix_2_to_1_fltp_flt_avx(float **src, float **matrix, int len,
  27. int out_ch, int in_ch);
  28. void ff_mix_2_to_1_s16p_flt_sse2(int16_t **src, float **matrix, int len,
  29. int out_ch, int in_ch);
  30. void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
  31. int out_ch, int in_ch);
  32. void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
  33. int len, int out_ch, int in_ch);
  34. void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
  35. int out_ch, int in_ch);
  36. void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
  37. int out_ch, int in_ch);
  38. void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
  39. int out_ch, int in_ch);
  40. void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
  41. int out_ch, int in_ch);
  42. void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
  43. int out_ch, int in_ch);
  44. #define DEFINE_MIX_3_8_TO_1_2(chan) \
  45. void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \
  46. float **matrix, int len, \
  47. int out_ch, int in_ch); \
  48. void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \
  49. float **matrix, int len, \
  50. int out_ch, int in_ch); \
  51. \
  52. void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \
  53. float **matrix, int len, \
  54. int out_ch, int in_ch); \
  55. void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \
  56. float **matrix, int len, \
  57. int out_ch, int in_ch); \
  58. \
  59. void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \
  60. float **matrix, int len, \
  61. int out_ch, int in_ch); \
  62. void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \
  63. float **matrix, int len, \
  64. int out_ch, int in_ch); \
  65. \
  66. void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \
  67. float **matrix, int len, \
  68. int out_ch, int in_ch); \
  69. void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \
  70. float **matrix, int len, \
  71. int out_ch, int in_ch); \
  72. \
  73. void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \
  74. float **matrix, int len, \
  75. int out_ch, int in_ch); \
  76. void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \
  77. float **matrix, int len, \
  78. int out_ch, int in_ch); \
  79. \
  80. void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \
  81. float **matrix, int len, \
  82. int out_ch, int in_ch); \
  83. void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \
  84. float **matrix, int len, \
  85. int out_ch, int in_ch); \
  86. \
  87. void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \
  88. float **matrix, int len, \
  89. int out_ch, int in_ch); \
  90. void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \
  91. float **matrix, int len, \
  92. int out_ch, int in_ch);
  93. DEFINE_MIX_3_8_TO_1_2(3)
  94. DEFINE_MIX_3_8_TO_1_2(4)
  95. DEFINE_MIX_3_8_TO_1_2(5)
  96. DEFINE_MIX_3_8_TO_1_2(6)
  97. DEFINE_MIX_3_8_TO_1_2(7)
  98. DEFINE_MIX_3_8_TO_1_2(8)
  99. #define SET_MIX_3_8_TO_1_2(chan) \
  100. if (EXTERNAL_SSE(mm_flags)) { \
  101. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  102. chan, 1, 16, 4, "SSE", \
  103. ff_mix_ ## chan ## _to_1_fltp_flt_sse); \
  104. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  105. chan, 2, 16, 4, "SSE", \
  106. ff_mix_## chan ##_to_2_fltp_flt_sse); \
  107. } \
  108. if (EXTERNAL_SSE2(mm_flags)) { \
  109. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  110. chan, 1, 16, 8, "SSE2", \
  111. ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \
  112. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  113. chan, 2, 16, 8, "SSE2", \
  114. ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \
  115. } \
  116. if (EXTERNAL_SSE4(mm_flags)) { \
  117. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  118. chan, 1, 16, 8, "SSE4", \
  119. ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \
  120. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  121. chan, 2, 16, 8, "SSE4", \
  122. ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \
  123. } \
  124. if (EXTERNAL_AVX(mm_flags)) { \
  125. int ptr_align = 32; \
  126. int smp_align = 8; \
  127. if (ARCH_X86_32 || chan >= 6) { \
  128. ptr_align = 16; \
  129. smp_align = 4; \
  130. } \
  131. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  132. chan, 1, ptr_align, smp_align, "AVX", \
  133. ff_mix_ ## chan ## _to_1_fltp_flt_avx); \
  134. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  135. chan, 2, ptr_align, smp_align, "AVX", \
  136. ff_mix_ ## chan ## _to_2_fltp_flt_avx); \
  137. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  138. chan, 1, 16, 8, "AVX", \
  139. ff_mix_ ## chan ## _to_1_s16p_flt_avx); \
  140. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  141. chan, 2, 16, 8, "AVX", \
  142. ff_mix_ ## chan ## _to_2_s16p_flt_avx); \
  143. } \
  144. if (EXTERNAL_FMA4(mm_flags)) { \
  145. int ptr_align = 32; \
  146. int smp_align = 8; \
  147. if (ARCH_X86_32 || chan >= 6) { \
  148. ptr_align = 16; \
  149. smp_align = 4; \
  150. } \
  151. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  152. chan, 1, ptr_align, smp_align, "FMA4", \
  153. ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \
  154. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\
  155. chan, 2, ptr_align, smp_align, "FMA4", \
  156. ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \
  157. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  158. chan, 1, 16, 8, "FMA4", \
  159. ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \
  160. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\
  161. chan, 2, 16, 8, "FMA4", \
  162. ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \
  163. }
  164. av_cold void ff_audio_mix_init_x86(AudioMix *am)
  165. {
  166. #if HAVE_YASM
  167. int mm_flags = av_get_cpu_flags();
  168. if (EXTERNAL_SSE(mm_flags)) {
  169. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
  170. 2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
  171. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
  172. 1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
  173. }
  174. if (EXTERNAL_SSE2(mm_flags)) {
  175. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
  176. 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
  177. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
  178. 2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
  179. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
  180. 1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
  181. }
  182. if (EXTERNAL_SSE4(mm_flags)) {
  183. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
  184. 2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
  185. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
  186. 1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
  187. }
  188. if (EXTERNAL_AVX(mm_flags)) {
  189. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
  190. 2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
  191. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
  192. 1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
  193. ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
  194. 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
  195. }
  196. SET_MIX_3_8_TO_1_2(3)
  197. SET_MIX_3_8_TO_1_2(4)
  198. SET_MIX_3_8_TO_1_2(5)
  199. SET_MIX_3_8_TO_1_2(6)
  200. SET_MIX_3_8_TO_1_2(7)
  201. SET_MIX_3_8_TO_1_2(8)
  202. #endif /* HAVE_YASM */
  203. }