vf_hqdn3d.asm 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. ;******************************************************************************
  2. ;* Copyright (c) 2012 Loren Merritt
  3. ;*
  4. ;* This file is part of FFmpeg.
  5. ;*
  6. ;* FFmpeg is free software; you can redistribute it and/or
  7. ;* modify it under the terms of the GNU Lesser General Public
  8. ;* License as published by the Free Software Foundation; either
  9. ;* version 2.1 of the License, or (at your option) any later version.
  10. ;*
  11. ;* FFmpeg is distributed in the hope that it will be useful,
  12. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. ;* Lesser General Public License for more details.
  15. ;*
  16. ;* You should have received a copy of the GNU Lesser General Public
  17. ;* License along with FFmpeg; if not, write to the Free Software
  18. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. ;******************************************************************************
  20. %include "libavutil/x86/x86util.asm"
  21. SECTION .text
  22. %macro LOWPASS 3 ; prevsample, cursample, lut
  23. sub %1q, %2q
  24. %if lut_bits != 8
  25. sar %1q, 8-lut_bits
  26. %endif
  27. movsx %1q, word [%3q+%1q*2]
  28. add %1q, %2q
  29. %endmacro
  30. %macro LOAD 3 ; dstreg, x, bitdepth
  31. %if %3 == 8
  32. movzx %1, byte [srcq+%2]
  33. %else
  34. movzx %1, word [srcq+(%2)*2]
  35. %endif
  36. %if %3 != 16
  37. shl %1, 16-%3
  38. add %1, (1<<(15-%3))-1
  39. %endif
  40. %endmacro
  41. %macro HQDN3D_ROW 1 ; bitdepth
  42. %if ARCH_X86_64
  43. cglobal hqdn3d_row_%1_x86, 7,10,0, src, dst, lineant, frameant, width, spatial, temporal, pixelant, t0, t1
  44. %else
  45. cglobal hqdn3d_row_%1_x86, 7,7,0, src, dst, lineant, frameant, width, spatial, temporal
  46. %endif
  47. %assign bytedepth (%1+7)>>3
  48. %assign lut_bits 4+4*(%1/16)
  49. dec widthq
  50. lea srcq, [srcq+widthq*bytedepth]
  51. lea dstq, [dstq+widthq*bytedepth]
  52. lea frameantq, [frameantq+widthq*2]
  53. lea lineantq, [lineantq+widthq*2]
  54. neg widthq
  55. %define xq widthq
  56. %if ARCH_X86_32
  57. mov dstmp, dstq
  58. mov srcmp, srcq
  59. mov frameantmp, frameantq
  60. mov lineantmp, lineantq
  61. %define dstq r0
  62. %define frameantq r0
  63. %define lineantq r0
  64. %define pixelantq r1
  65. %define pixelantd r1d
  66. DECLARE_REG_TMP 2,3
  67. %endif
  68. LOAD pixelantd, xq, %1
  69. ALIGN 16
  70. .loop:
  71. movifnidn srcq, srcmp
  72. LOAD t0d, xq+1, %1 ; skip on the last iteration to avoid overread
  73. .loop2:
  74. movifnidn lineantq, lineantmp
  75. movzx t1d, word [lineantq+xq*2]
  76. LOWPASS t1, pixelant, spatial
  77. mov [lineantq+xq*2], t1w
  78. LOWPASS pixelant, t0, spatial
  79. movifnidn frameantq, frameantmp
  80. movzx t0d, word [frameantq+xq*2]
  81. LOWPASS t0, t1, temporal
  82. mov [frameantq+xq*2], t0w
  83. movifnidn dstq, dstmp
  84. %if %1 != 16
  85. shr t0d, 16-%1 ; could eliminate this by storing from t0h, but only with some contraints on register allocation
  86. %endif
  87. %if %1 == 8
  88. mov [dstq+xq], t0b
  89. %else
  90. mov [dstq+xq*2], t0w
  91. %endif
  92. inc xq
  93. jl .loop
  94. je .loop2
  95. REP_RET
  96. %endmacro ; HQDN3D_ROW
  97. HQDN3D_ROW 8
  98. HQDN3D_ROW 9
  99. HQDN3D_ROW 10
  100. HQDN3D_ROW 16