rgb_2_rgb.asm 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;******************************************************************************
  2. ;* Copyright Nick Kurshev
  3. ;* Copyright Michael (michaelni@gmx.at)
  4. ;* Copyright 2018 Jokyo Images
  5. ;*
  6. ;* This file is part of FFmpeg.
  7. ;*
  8. ;* FFmpeg is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* FFmpeg is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with FFmpeg; if not, write to the Free Software
  20. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;******************************************************************************
  22. %include "libavutil/x86/x86util.asm"
  23. SECTION_RODATA
  24. pb_shuffle2103: db 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15
  25. pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
  26. pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
  27. pb_shuffle3012: db 3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14
  28. pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
  29. SECTION .text
  30. ;------------------------------------------------------------------------------
  31. ; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
  32. ;------------------------------------------------------------------------------
  33. ; %1-4 index shuffle
  34. %macro SHUFFLE_BYTES 4
  35. cglobal shuffle_bytes_%1%2%3%4, 3, 5, 2, src, dst, w, tmp, x
  36. VBROADCASTI128 m0, [pb_shuffle%1%2%3%4]
  37. movsxdifnidn wq, wd
  38. mov xq, wq
  39. add srcq, wq
  40. add dstq, wq
  41. neg wq
  42. ;calc scalar loop
  43. and xq, mmsize-4
  44. je .loop_simd
  45. .loop_scalar:
  46. mov tmpb, [srcq + wq + %1]
  47. mov [dstq+wq + 0], tmpb
  48. mov tmpb, [srcq + wq + %2]
  49. mov [dstq+wq + 1], tmpb
  50. mov tmpb, [srcq + wq + %3]
  51. mov [dstq+wq + 2], tmpb
  52. mov tmpb, [srcq + wq + %4]
  53. mov [dstq+wq + 3], tmpb
  54. add wq, 4
  55. sub xq, 4
  56. jg .loop_scalar
  57. ;check if src_size < mmsize
  58. cmp wq, 0
  59. jge .end
  60. .loop_simd:
  61. movu m1, [srcq+wq]
  62. pshufb m1, m0
  63. movu [dstq+wq], m1
  64. add wq, mmsize
  65. jl .loop_simd
  66. .end:
  67. RET
  68. %endmacro
  69. INIT_XMM ssse3
  70. SHUFFLE_BYTES 2, 1, 0, 3
  71. SHUFFLE_BYTES 0, 3, 2, 1
  72. SHUFFLE_BYTES 1, 2, 3, 0
  73. SHUFFLE_BYTES 3, 0, 1, 2
  74. SHUFFLE_BYTES 3, 2, 1, 0