audio_convert.asm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Michael Niedermayer
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86util.asm"
  21. SECTION_RODATA 32
  22. flt2pm31: times 8 dd 4.6566129e-10
  23. flt2p31 : times 8 dd 2147483648.0
  24. flt2p15 : times 8 dd 32768.0
  25. word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
  26. SECTION .text
  27. ;to, from, a/u, log2_outsize, log_intsize, const
  28. %macro PACK_2CH 5-7
  29. cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
  30. mov src2q , [srcq+gprsize]
  31. mov srcq , [srcq]
  32. mov dstq , [dstq]
  33. %ifidn %3, a
  34. test dstq, mmsize-1
  35. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  36. test srcq, mmsize-1
  37. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  38. test src2q, mmsize-1
  39. jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
  40. %else
  41. pack_2ch_%2_to_%1_u_int %+ SUFFIX
  42. %endif
  43. lea srcq , [srcq + (1<<%5)*lenq]
  44. lea src2q, [src2q + (1<<%5)*lenq]
  45. lea dstq , [dstq + (2<<%4)*lenq]
  46. neg lenq
  47. %7 m0,m1,m2,m3,m4,m5
  48. .next:
  49. %if %4 >= %5
  50. mov%3 m0, [ srcq +(1<<%5)*lenq]
  51. mova m1, m0
  52. mov%3 m2, [ src2q+(1<<%5)*lenq]
  53. %if %5 == 1
  54. punpcklwd m0, m2
  55. punpckhwd m1, m2
  56. %else
  57. punpckldq m0, m2
  58. punpckhdq m1, m2
  59. %endif
  60. %6 m0,m1,m2,m3,m4,m5
  61. %else
  62. mov%3 m0, [ srcq +(1<<%5)*lenq]
  63. mov%3 m1, [mmsize + srcq +(1<<%5)*lenq]
  64. mov%3 m2, [ src2q+(1<<%5)*lenq]
  65. mov%3 m3, [mmsize + src2q+(1<<%5)*lenq]
  66. %6 m0,m1,m2,m3,m4,m5
  67. mova m2, m0
  68. punpcklwd m0, m1
  69. punpckhwd m2, m1
  70. SWAP 1,2
  71. %endif
  72. mov%3 [ dstq+(2<<%4)*lenq], m0
  73. mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
  74. %if %4 > %5
  75. mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
  76. mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
  77. add lenq, 4*mmsize/(2<<%4)
  78. %else
  79. add lenq, 2*mmsize/(2<<%4)
  80. %endif
  81. jl .next
  82. REP_RET
  83. %endmacro
  84. %macro UNPACK_2CH 5-7
  85. cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
  86. mov dst2q , [dstq+gprsize]
  87. mov srcq , [srcq]
  88. mov dstq , [dstq]
  89. %ifidn %3, a
  90. test dstq, mmsize-1
  91. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  92. test srcq, mmsize-1
  93. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  94. test dst2q, mmsize-1
  95. jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  96. %else
  97. unpack_2ch_%2_to_%1_u_int %+ SUFFIX
  98. %endif
  99. lea srcq , [srcq + (2<<%5)*lenq]
  100. lea dstq , [dstq + (1<<%4)*lenq]
  101. lea dst2q, [dst2q + (1<<%4)*lenq]
  102. neg lenq
  103. %7 m0,m1,m2,m3,m4,m5
  104. mova m6, [word_unpack_shuf]
  105. .next:
  106. mov%3 m0, [ srcq +(2<<%5)*lenq]
  107. mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq]
  108. %if %5 == 1
  109. %ifidn SUFFIX, _ssse3
  110. pshufb m0, m6
  111. mova m1, m0
  112. pshufb m2, m6
  113. punpcklqdq m0,m2
  114. punpckhqdq m1,m2
  115. %else
  116. mova m1, m0
  117. punpcklwd m0,m2
  118. punpckhwd m1,m2
  119. mova m2, m0
  120. punpcklwd m0,m1
  121. punpckhwd m2,m1
  122. mova m1, m0
  123. punpcklwd m0,m2
  124. punpckhwd m1,m2
  125. %endif
  126. %else
  127. mova m1, m0
  128. shufps m0, m2, 10001000b
  129. shufps m1, m2, 11011101b
  130. %endif
  131. %if %4 < %5
  132. mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq]
  133. mova m3, m2
  134. mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq]
  135. shufps m2, m4, 10001000b
  136. shufps m3, m4, 11011101b
  137. SWAP 1,2
  138. %endif
  139. %6 m0,m1,m2,m3,m4,m5
  140. mov%3 [ dstq+(1<<%4)*lenq], m0
  141. %if %4 > %5
  142. mov%3 [ dst2q+(1<<%4)*lenq], m2
  143. mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
  144. mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
  145. add lenq, 2*mmsize/(1<<%4)
  146. %else
  147. mov%3 [ dst2q+(1<<%4)*lenq], m1
  148. add lenq, mmsize/(1<<%4)
  149. %endif
  150. jl .next
  151. REP_RET
  152. %endmacro
  153. %macro CONV 5-7
  154. cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
  155. mov srcq , [srcq]
  156. mov dstq , [dstq]
  157. %ifidn %3, a
  158. test dstq, mmsize-1
  159. jne %2_to_%1_u_int %+ SUFFIX
  160. test srcq, mmsize-1
  161. jne %2_to_%1_u_int %+ SUFFIX
  162. %else
  163. %2_to_%1_u_int %+ SUFFIX
  164. %endif
  165. lea srcq , [srcq + (1<<%5)*lenq]
  166. lea dstq , [dstq + (1<<%4)*lenq]
  167. neg lenq
  168. %7 m0,m1,m2,m3,m4,m5
  169. .next:
  170. mov%3 m0, [ srcq +(1<<%5)*lenq]
  171. mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
  172. %if %4 < %5
  173. mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
  174. mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
  175. %endif
  176. %6 m0,m1,m2,m3,m4,m5
  177. mov%3 [ dstq+(1<<%4)*lenq], m0
  178. mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
  179. %if %4 > %5
  180. mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
  181. mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
  182. add lenq, 4*mmsize/(1<<%4)
  183. %else
  184. add lenq, 2*mmsize/(1<<%4)
  185. %endif
  186. jl .next
  187. %if mmsize == 8
  188. emms
  189. RET
  190. %else
  191. REP_RET
  192. %endif
  193. %endmacro
  194. %macro PACK_6CH 5-7
  195. cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
  196. %if ARCH_X86_64
  197. mov lend, r2d
  198. %else
  199. %define lend dword r2m
  200. %endif
  201. mov src1q, [srcq+1*gprsize]
  202. mov src2q, [srcq+2*gprsize]
  203. mov src3q, [srcq+3*gprsize]
  204. mov src4q, [srcq+4*gprsize]
  205. mov src5q, [srcq+5*gprsize]
  206. mov srcq, [srcq]
  207. mov dstq, [dstq]
  208. %ifidn %3, a
  209. test dstq, mmsize-1
  210. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  211. test srcq, mmsize-1
  212. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  213. test src1q, mmsize-1
  214. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  215. test src2q, mmsize-1
  216. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  217. test src3q, mmsize-1
  218. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  219. test src4q, mmsize-1
  220. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  221. test src5q, mmsize-1
  222. jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
  223. %else
  224. pack_6ch_%2_to_%1_u_int %+ SUFFIX
  225. %endif
  226. sub src1q, srcq
  227. sub src2q, srcq
  228. sub src3q, srcq
  229. sub src4q, srcq
  230. sub src5q, srcq
  231. .loop:
  232. mov%3 m0, [srcq ]
  233. mov%3 m1, [srcq+src1q]
  234. mov%3 m2, [srcq+src2q]
  235. mov%3 m3, [srcq+src3q]
  236. mov%3 m4, [srcq+src4q]
  237. mov%3 m5, [srcq+src5q]
  238. %7 x,x,x,x,m7,x
  239. %if cpuflag(sse4)
  240. SBUTTERFLYPS 0, 1, 6
  241. SBUTTERFLYPS 2, 3, 6
  242. SBUTTERFLYPS 4, 5, 6
  243. blendps m6, m4, m0, 1100b
  244. movlhps m0, m2
  245. movhlps m4, m2
  246. blendps m2, m5, m1, 1100b
  247. movlhps m1, m3
  248. movhlps m5, m3
  249. %6 m0,m6,x,x,m7,m3
  250. %6 m4,m1,x,x,m7,m3
  251. %6 m2,m5,x,x,m7,m3
  252. mov %+ %3 %+ ps [dstq ], m0
  253. mov %+ %3 %+ ps [dstq+16], m6
  254. mov %+ %3 %+ ps [dstq+32], m4
  255. mov %+ %3 %+ ps [dstq+48], m1
  256. mov %+ %3 %+ ps [dstq+64], m2
  257. mov %+ %3 %+ ps [dstq+80], m5
  258. %else ; mmx
  259. SBUTTERFLY dq, 0, 1, 6
  260. SBUTTERFLY dq, 2, 3, 6
  261. SBUTTERFLY dq, 4, 5, 6
  262. movq [dstq ], m0
  263. movq [dstq+ 8], m2
  264. movq [dstq+16], m4
  265. movq [dstq+24], m1
  266. movq [dstq+32], m3
  267. movq [dstq+40], m5
  268. %endif
  269. add srcq, mmsize
  270. add dstq, mmsize*6
  271. sub lend, mmsize/4
  272. jg .loop
  273. %if mmsize == 8
  274. emms
  275. RET
  276. %else
  277. REP_RET
  278. %endif
  279. %endmacro
  280. %macro INT16_TO_INT32_N 6
  281. pxor m2, m2
  282. pxor m3, m3
  283. punpcklwd m2, m1
  284. punpckhwd m3, m1
  285. SWAP 4,0
  286. pxor m0, m0
  287. pxor m1, m1
  288. punpcklwd m0, m4
  289. punpckhwd m1, m4
  290. %endmacro
  291. %macro INT32_TO_INT16_N 6
  292. psrad m0, 16
  293. psrad m1, 16
  294. psrad m2, 16
  295. psrad m3, 16
  296. packssdw m0, m1
  297. packssdw m2, m3
  298. SWAP 1,2
  299. %endmacro
  300. %macro INT32_TO_FLOAT_INIT 6
  301. mova %5, [flt2pm31]
  302. %endmacro
  303. %macro INT32_TO_FLOAT_N 6
  304. cvtdq2ps %1, %1
  305. cvtdq2ps %2, %2
  306. mulps %1, %1, %5
  307. mulps %2, %2, %5
  308. %endmacro
  309. %macro FLOAT_TO_INT32_INIT 6
  310. mova %5, [flt2p31]
  311. %endmacro
  312. %macro FLOAT_TO_INT32_N 6
  313. mulps %1, %5
  314. mulps %2, %5
  315. cvtps2dq %6, %1
  316. cmpnltps %1, %5
  317. paddd %1, %6
  318. cvtps2dq %6, %2
  319. cmpnltps %2, %5
  320. paddd %2, %6
  321. %endmacro
  322. %macro INT16_TO_FLOAT_INIT 6
  323. mova m5, [flt2pm31]
  324. %endmacro
  325. %macro INT16_TO_FLOAT_N 6
  326. INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
  327. cvtdq2ps m0, m0
  328. cvtdq2ps m1, m1
  329. cvtdq2ps m2, m2
  330. cvtdq2ps m3, m3
  331. mulps m0, m0, m5
  332. mulps m1, m1, m5
  333. mulps m2, m2, m5
  334. mulps m3, m3, m5
  335. %endmacro
  336. %macro FLOAT_TO_INT16_INIT 6
  337. mova m5, [flt2p15]
  338. %endmacro
  339. %macro FLOAT_TO_INT16_N 6
  340. mulps m0, m5
  341. mulps m1, m5
  342. mulps m2, m5
  343. mulps m3, m5
  344. cvtps2dq m0, m0
  345. cvtps2dq m1, m1
  346. packssdw m0, m1
  347. cvtps2dq m1, m2
  348. cvtps2dq m3, m3
  349. packssdw m1, m3
  350. %endmacro
  351. %macro NOP_N 0-6
  352. %endmacro
  353. INIT_MMX mmx
  354. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  355. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  356. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  357. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  358. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  359. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  360. INIT_XMM sse2
  361. CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  362. CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  363. CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  364. CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  365. PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  366. PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  367. PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
  368. PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
  369. PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  370. PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  371. PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  372. PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  373. UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  374. UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  375. UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
  376. UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
  377. UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  378. UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  379. UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
  380. UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
  381. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  382. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  383. CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  384. CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  385. CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  386. CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  387. CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  388. CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  389. PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  390. PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  391. PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  392. PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  393. PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  394. PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  395. PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  396. PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  397. UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  398. UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  399. UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  400. UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  401. UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  402. UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  403. UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  404. UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
  405. INIT_XMM ssse3
  406. UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
  407. UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
  408. UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
  409. UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
  410. UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  411. UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
  412. INIT_XMM sse4
  413. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  414. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  415. PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  416. PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  417. PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  418. PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  419. %if HAVE_AVX_EXTERNAL
  420. INIT_XMM avx
  421. PACK_6CH float, float, u, 2, 2, NOP_N, NOP_N
  422. PACK_6CH float, float, a, 2, 2, NOP_N, NOP_N
  423. PACK_6CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  424. PACK_6CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  425. PACK_6CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  426. PACK_6CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
  427. INIT_YMM avx
  428. CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  429. CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
  430. %endif