vf_gradfun_init.c 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. /*
  2. * Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "config.h"
  21. #include "libavutil/attributes.h"
  22. #include "libavutil/cpu.h"
  23. #include "libavutil/mem.h"
  24. #include "libavutil/x86/asm.h"
  25. #include "libavutil/x86/cpu.h"
  26. #include "libavfilter/gradfun.h"
  27. void ff_gradfun_filter_line_mmxext(intptr_t x, uint8_t *dst, const uint8_t *src,
  28. const uint16_t *dc, int thresh,
  29. const uint16_t *dithers);
  30. void ff_gradfun_filter_line_ssse3(intptr_t x, uint8_t *dst, const uint8_t *src,
  31. const uint16_t *dc, int thresh,
  32. const uint16_t *dithers);
  33. void ff_gradfun_blur_line_movdqa_sse2(intptr_t x, uint16_t *buf,
  34. const uint16_t *buf1, uint16_t *dc,
  35. const uint8_t *src1, const uint8_t *src2);
  36. void ff_gradfun_blur_line_movdqu_sse2(intptr_t x, uint16_t *buf,
  37. const uint16_t *buf1, uint16_t *dc,
  38. const uint8_t *src1, const uint8_t *src2);
  39. #if HAVE_YASM
  40. static void gradfun_filter_line_mmxext(uint8_t *dst, const uint8_t *src,
  41. const uint16_t *dc,
  42. int width, int thresh,
  43. const uint16_t *dithers)
  44. {
  45. intptr_t x;
  46. if (width & 3) {
  47. x = width & ~3;
  48. ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2,
  49. width - x, thresh, dithers);
  50. width = x;
  51. }
  52. x = -width;
  53. ff_gradfun_filter_line_mmxext(x, dst + width, src + width, dc + width / 2,
  54. thresh, dithers);
  55. }
  56. static void gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc,
  57. int width, int thresh,
  58. const uint16_t *dithers)
  59. {
  60. intptr_t x;
  61. if (width & 7) {
  62. // could be 10% faster if I somehow eliminated this
  63. x = width & ~7;
  64. ff_gradfun_filter_line_c(dst + x, src + x, dc + x / 2,
  65. width - x, thresh, dithers);
  66. width = x;
  67. }
  68. x = -width;
  69. ff_gradfun_filter_line_ssse3(x, dst + width, src + width, dc + width / 2,
  70. thresh, dithers);
  71. }
  72. static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1,
  73. const uint8_t *src, int src_linesize, int width)
  74. {
  75. intptr_t x = -2 * width;
  76. if (((intptr_t) src | src_linesize) & 15)
  77. ff_gradfun_blur_line_movdqu_sse2(x, buf + width, buf1 + width,
  78. dc + width, src + width * 2,
  79. src + width * 2 + src_linesize);
  80. else
  81. ff_gradfun_blur_line_movdqa_sse2(x, buf + width, buf1 + width,
  82. dc + width, src + width * 2,
  83. src + width * 2 + src_linesize);
  84. }
  85. #endif /* HAVE_YASM */
  86. av_cold void ff_gradfun_init_x86(GradFunContext *gf)
  87. {
  88. #if HAVE_YASM
  89. int cpu_flags = av_get_cpu_flags();
  90. if (EXTERNAL_MMXEXT(cpu_flags))
  91. gf->filter_line = gradfun_filter_line_mmxext;
  92. if (EXTERNAL_SSSE3(cpu_flags))
  93. gf->filter_line = gradfun_filter_line_ssse3;
  94. if (EXTERNAL_SSE2(cpu_flags))
  95. gf->blur_line = gradfun_blur_line_sse2;
  96. #endif /* HAVE_YASM */
  97. }