yuv2rgb.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. /*
  2. * software YUV to RGB converter
  3. *
  4. * Copyright (C) 2001-2007 Michael Niedermayer
  5. * Copyright (C) 2009-2010 Konstantin Shishkov
  6. *
  7. * MMX/MMXEXT template stuff (needed for fast movntq support),
  8. * 1,4,8bpp support and context / deglobalize stuff
  9. * by Michael Niedermayer (michaelni@gmx.at)
  10. *
  11. * This file is part of FFmpeg.
  12. *
  13. * FFmpeg is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * FFmpeg is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with FFmpeg; if not, write to the Free Software
  25. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26. */
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <inttypes.h>
  30. #include "config.h"
  31. #include "libswscale/rgb2rgb.h"
  32. #include "libswscale/swscale.h"
  33. #include "libswscale/swscale_internal.h"
  34. #include "libavutil/attributes.h"
  35. #include "libavutil/x86/asm.h"
  36. #include "libavutil/x86/cpu.h"
  37. #include "libavutil/cpu.h"
  38. #if HAVE_X86ASM
  39. #define YUV2RGB_LOOP(depth) \
  40. h_size = (c->dstW + 7) & ~7; \
  41. if (h_size * depth > FFABS(dstStride[0])) \
  42. h_size -= 8; \
  43. \
  44. vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
  45. \
  46. for (y = 0; y < srcSliceH; y++) { \
  47. uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
  48. const uint8_t *py = src[0] + y * srcStride[0]; \
  49. const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
  50. const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
  51. x86_reg index = -h_size / 2; \
  52. extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  53. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  54. const uint8_t *py_2index);
  55. extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  56. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  57. const uint8_t *py_2index);
  58. extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  59. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  60. const uint8_t *py_2index);
  61. extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  62. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  63. const uint8_t *py_2index);
  64. extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  65. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  66. const uint8_t *py_2index);
  67. extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  68. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  69. const uint8_t *py_2index);
  70. extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  71. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  72. const uint8_t *py_2index, const uint8_t *pa_2index);
  73. extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  74. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  75. const uint8_t *py_2index, const uint8_t *pa_2index);
  76. static inline int yuv420_rgb15_ssse3(SwsContext *c, const uint8_t *src[],
  77. int srcStride[],
  78. int srcSliceY, int srcSliceH,
  79. uint8_t *dst[], int dstStride[])
  80. {
  81. int y, h_size, vshift;
  82. YUV2RGB_LOOP(2)
  83. c->blueDither = ff_dither8[y & 1];
  84. c->greenDither = ff_dither8[y & 1];
  85. c->redDither = ff_dither8[(y + 1) & 1];
  86. ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  87. }
  88. return srcSliceH;
  89. }
  90. static inline int yuv420_rgb16_ssse3(SwsContext *c, const uint8_t *src[],
  91. int srcStride[],
  92. int srcSliceY, int srcSliceH,
  93. uint8_t *dst[], int dstStride[])
  94. {
  95. int y, h_size, vshift;
  96. YUV2RGB_LOOP(2)
  97. c->blueDither = ff_dither8[y & 1];
  98. c->greenDither = ff_dither4[y & 1];
  99. c->redDither = ff_dither8[(y + 1) & 1];
  100. ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  101. }
  102. return srcSliceH;
  103. }
  104. static inline int yuv420_rgb32_ssse3(SwsContext *c, const uint8_t *src[],
  105. int srcStride[],
  106. int srcSliceY, int srcSliceH,
  107. uint8_t *dst[], int dstStride[])
  108. {
  109. int y, h_size, vshift;
  110. YUV2RGB_LOOP(4)
  111. ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  112. }
  113. return srcSliceH;
  114. }
  115. static inline int yuv420_bgr32_ssse3(SwsContext *c, const uint8_t *src[],
  116. int srcStride[],
  117. int srcSliceY, int srcSliceH,
  118. uint8_t *dst[], int dstStride[])
  119. {
  120. int y, h_size, vshift;
  121. YUV2RGB_LOOP(4)
  122. ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  123. }
  124. return srcSliceH;
  125. }
  126. static inline int yuva420_rgb32_ssse3(SwsContext *c, const uint8_t *src[],
  127. int srcStride[],
  128. int srcSliceY, int srcSliceH,
  129. uint8_t *dst[], int dstStride[])
  130. {
  131. int y, h_size, vshift;
  132. YUV2RGB_LOOP(4)
  133. const uint8_t *pa = src[3] + y * srcStride[3];
  134. ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
  135. }
  136. return srcSliceH;
  137. }
  138. static inline int yuva420_bgr32_ssse3(SwsContext *c, const uint8_t *src[],
  139. int srcStride[],
  140. int srcSliceY, int srcSliceH,
  141. uint8_t *dst[], int dstStride[])
  142. {
  143. int y, h_size, vshift;
  144. YUV2RGB_LOOP(4)
  145. const uint8_t *pa = src[3] + y * srcStride[3];
  146. ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
  147. }
  148. return srcSliceH;
  149. }
  150. static inline int yuv420_rgb24_ssse3(SwsContext *c, const uint8_t *src[],
  151. int srcStride[],
  152. int srcSliceY, int srcSliceH,
  153. uint8_t *dst[], int dstStride[])
  154. {
  155. int y, h_size, vshift;
  156. YUV2RGB_LOOP(3)
  157. ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  158. }
  159. return srcSliceH;
  160. }
  161. static inline int yuv420_bgr24_ssse3(SwsContext *c, const uint8_t *src[],
  162. int srcStride[],
  163. int srcSliceY, int srcSliceH,
  164. uint8_t *dst[], int dstStride[])
  165. {
  166. int y, h_size, vshift;
  167. YUV2RGB_LOOP(3)
  168. ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  169. }
  170. return srcSliceH;
  171. }
  172. #endif /* HAVE_X86ASM */
  173. av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
  174. {
  175. #if HAVE_X86ASM
  176. int cpu_flags = av_get_cpu_flags();
  177. if (EXTERNAL_SSSE3(cpu_flags)) {
  178. switch (c->dstFormat) {
  179. case AV_PIX_FMT_RGB32:
  180. if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
  181. #if CONFIG_SWSCALE_ALPHA
  182. return yuva420_rgb32_ssse3;
  183. #endif
  184. break;
  185. } else
  186. return yuv420_rgb32_ssse3;
  187. case AV_PIX_FMT_BGR32:
  188. if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
  189. #if CONFIG_SWSCALE_ALPHA
  190. return yuva420_bgr32_ssse3;
  191. #endif
  192. break;
  193. } else
  194. return yuv420_bgr32_ssse3;
  195. case AV_PIX_FMT_RGB24:
  196. return yuv420_rgb24_ssse3;
  197. case AV_PIX_FMT_BGR24:
  198. return yuv420_bgr24_ssse3;
  199. case AV_PIX_FMT_RGB565:
  200. return yuv420_rgb16_ssse3;
  201. case AV_PIX_FMT_RGB555:
  202. return yuv420_rgb15_ssse3;
  203. }
  204. }
  205. #endif /* HAVE_X86ASM */
  206. return NULL;
  207. }