audio_convert.asm 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. ;******************************************************************************
  2. ;* x86 optimized Format Conversion Utils
  3. ;* Copyright (c) 2008 Loren Merritt
  4. ;*
  5. ;* This file is part of Libav.
  6. ;*
  7. ;* Libav is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* Libav is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with Libav; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. %include "x86inc.asm"
  22. %include "x86util.asm"
  23. SECTION_TEXT
  24. ;-----------------------------------------------------------------------------
  25. ; void ff_conv_fltp_to_flt_6ch(float *dst, float *const *src, int len,
  26. ; int channels);
  27. ;-----------------------------------------------------------------------------
  28. %macro CONV_FLTP_TO_FLT_6CH 0
  29. cglobal conv_fltp_to_flt_6ch, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
  30. %if ARCH_X86_64
  31. mov lend, r2d
  32. %else
  33. %define lend dword r2m
  34. %endif
  35. mov src1q, [srcq+1*gprsize]
  36. mov src2q, [srcq+2*gprsize]
  37. mov src3q, [srcq+3*gprsize]
  38. mov src4q, [srcq+4*gprsize]
  39. mov src5q, [srcq+5*gprsize]
  40. mov srcq, [srcq]
  41. sub src1q, srcq
  42. sub src2q, srcq
  43. sub src3q, srcq
  44. sub src4q, srcq
  45. sub src5q, srcq
  46. .loop:
  47. mova m0, [srcq ]
  48. mova m1, [srcq+src1q]
  49. mova m2, [srcq+src2q]
  50. mova m3, [srcq+src3q]
  51. mova m4, [srcq+src4q]
  52. mova m5, [srcq+src5q]
  53. %if cpuflag(sse4)
  54. SBUTTERFLYPS 0, 1, 6
  55. SBUTTERFLYPS 2, 3, 6
  56. SBUTTERFLYPS 4, 5, 6
  57. blendps m6, m4, m0, 1100b
  58. movlhps m0, m2
  59. movhlps m4, m2
  60. blendps m2, m5, m1, 1100b
  61. movlhps m1, m3
  62. movhlps m5, m3
  63. movaps [dstq ], m0
  64. movaps [dstq+16], m6
  65. movaps [dstq+32], m4
  66. movaps [dstq+48], m1
  67. movaps [dstq+64], m2
  68. movaps [dstq+80], m5
  69. %else ; mmx
  70. SBUTTERFLY dq, 0, 1, 6
  71. SBUTTERFLY dq, 2, 3, 6
  72. SBUTTERFLY dq, 4, 5, 6
  73. movq [dstq ], m0
  74. movq [dstq+ 8], m2
  75. movq [dstq+16], m4
  76. movq [dstq+24], m1
  77. movq [dstq+32], m3
  78. movq [dstq+40], m5
  79. %endif
  80. add srcq, mmsize
  81. add dstq, mmsize*6
  82. sub lend, mmsize/4
  83. jg .loop
  84. %if mmsize == 8
  85. emms
  86. RET
  87. %else
  88. REP_RET
  89. %endif
  90. %endmacro
  91. INIT_MMX mmx
  92. CONV_FLTP_TO_FLT_6CH
  93. INIT_XMM sse4
  94. CONV_FLTP_TO_FLT_6CH
  95. %if HAVE_AVX
  96. INIT_XMM avx
  97. CONV_FLTP_TO_FLT_6CH
  98. %endif