dither.asm 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. ;******************************************************************************
  2. ;* x86 optimized dithering format conversion
  3. ;* Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  4. ;*
  5. ;* This file is part of FFmpeg.
  6. ;*
  7. ;* FFmpeg is free software; you can redistribute it and/or
  8. ;* modify it under the terms of the GNU Lesser General Public
  9. ;* License as published by the Free Software Foundation; either
  10. ;* version 2.1 of the License, or (at your option) any later version.
  11. ;*
  12. ;* FFmpeg is distributed in the hope that it will be useful,
  13. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;* Lesser General Public License for more details.
  16. ;*
  17. ;* You should have received a copy of the GNU Lesser General Public
  18. ;* License along with FFmpeg; if not, write to the Free Software
  19. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;******************************************************************************
  21. %include "libavutil/x86/x86util.asm"
  22. SECTION_RODATA 32
  23. ; 1.0f / (2.0f * INT32_MAX)
  24. pf_dither_scale: times 8 dd 2.32830643762e-10
  25. pf_s16_scale: times 4 dd 32753.0
  26. SECTION .text
  27. ;------------------------------------------------------------------------------
  28. ; void ff_quantize(int16_t *dst, float *src, float *dither, int len);
  29. ;------------------------------------------------------------------------------
  30. INIT_XMM sse2
  31. cglobal quantize, 4,4,3, dst, src, dither, len
  32. lea lenq, [2*lend]
  33. add dstq, lenq
  34. lea srcq, [srcq+2*lenq]
  35. lea ditherq, [ditherq+2*lenq]
  36. neg lenq
  37. mova m2, [pf_s16_scale]
  38. .loop:
  39. mulps m0, m2, [srcq+2*lenq]
  40. mulps m1, m2, [srcq+2*lenq+mmsize]
  41. addps m0, [ditherq+2*lenq]
  42. addps m1, [ditherq+2*lenq+mmsize]
  43. cvtps2dq m0, m0
  44. cvtps2dq m1, m1
  45. packssdw m0, m1
  46. mova [dstq+lenq], m0
  47. add lenq, mmsize
  48. jl .loop
  49. REP_RET
  50. ;------------------------------------------------------------------------------
  51. ; void ff_dither_int_to_float_rectangular(float *dst, int *src, int len)
  52. ;------------------------------------------------------------------------------
  53. %macro DITHER_INT_TO_FLOAT_RECTANGULAR 0
  54. cglobal dither_int_to_float_rectangular, 3,3,3, dst, src, len
  55. lea lenq, [4*lend]
  56. add srcq, lenq
  57. add dstq, lenq
  58. neg lenq
  59. mova m0, [pf_dither_scale]
  60. .loop:
  61. cvtdq2ps m1, [srcq+lenq]
  62. cvtdq2ps m2, [srcq+lenq+mmsize]
  63. mulps m1, m1, m0
  64. mulps m2, m2, m0
  65. mova [dstq+lenq], m1
  66. mova [dstq+lenq+mmsize], m2
  67. add lenq, 2*mmsize
  68. jl .loop
  69. REP_RET
  70. %endmacro
  71. INIT_XMM sse2
  72. DITHER_INT_TO_FLOAT_RECTANGULAR
  73. INIT_YMM avx
  74. DITHER_INT_TO_FLOAT_RECTANGULAR
  75. ;------------------------------------------------------------------------------
  76. ; void ff_dither_int_to_float_triangular(float *dst, int *src0, int len)
  77. ;------------------------------------------------------------------------------
  78. %macro DITHER_INT_TO_FLOAT_TRIANGULAR 0
  79. cglobal dither_int_to_float_triangular, 3,4,5, dst, src0, len, src1
  80. lea lenq, [4*lend]
  81. lea src1q, [src0q+2*lenq]
  82. add src0q, lenq
  83. add dstq, lenq
  84. neg lenq
  85. mova m0, [pf_dither_scale]
  86. .loop:
  87. cvtdq2ps m1, [src0q+lenq]
  88. cvtdq2ps m2, [src0q+lenq+mmsize]
  89. cvtdq2ps m3, [src1q+lenq]
  90. cvtdq2ps m4, [src1q+lenq+mmsize]
  91. addps m1, m1, m3
  92. addps m2, m2, m4
  93. mulps m1, m1, m0
  94. mulps m2, m2, m0
  95. mova [dstq+lenq], m1
  96. mova [dstq+lenq+mmsize], m2
  97. add lenq, 2*mmsize
  98. jl .loop
  99. REP_RET
  100. %endmacro
  101. INIT_XMM sse2
  102. DITHER_INT_TO_FLOAT_TRIANGULAR
  103. INIT_YMM avx
  104. DITHER_INT_TO_FLOAT_TRIANGULAR