yuv2rgb.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. /*
  2. * software YUV to RGB converter
  3. *
  4. * Copyright (C) 2001-2007 Michael Niedermayer
  5. * Copyright (C) 2009-2010 Konstantin Shishkov
  6. *
  7. * MMX/MMXEXT template stuff (needed for fast movntq support),
  8. * 1,4,8bpp support and context / deglobalize stuff
  9. * by Michael Niedermayer (michaelni@gmx.at)
  10. *
  11. * This file is part of FFmpeg.
  12. *
  13. * FFmpeg is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * FFmpeg is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with FFmpeg; if not, write to the Free Software
  25. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26. */
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <inttypes.h>
  30. #include "config.h"
  31. #include "libswscale/rgb2rgb.h"
  32. #include "libswscale/swscale.h"
  33. #include "libswscale/swscale_internal.h"
  34. #include "libavutil/attributes.h"
  35. #include "libavutil/x86/asm.h"
  36. #include "libavutil/x86/cpu.h"
  37. #include "libavutil/cpu.h"
  38. #if HAVE_X86ASM
  39. #define YUV2RGB_LOOP(depth) \
  40. h_size = (c->dstW + 7) & ~7; \
  41. if (h_size * depth > FFABS(dstStride[0])) \
  42. h_size -= 8; \
  43. \
  44. vshift = c->srcFormat != AV_PIX_FMT_YUV422P; \
  45. \
  46. for (y = 0; y < srcSliceH; y++) { \
  47. uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
  48. const uint8_t *py = src[0] + y * srcStride[0]; \
  49. const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
  50. const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
  51. x86_reg index = -h_size / 2; \
  52. extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  53. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  54. const uint8_t *py_2index);
  55. extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  56. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  57. const uint8_t *py_2index);
  58. extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  59. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  60. const uint8_t *py_2index);
  61. extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  62. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  63. const uint8_t *py_2index);
  64. extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  65. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  66. const uint8_t *py_2index);
  67. extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  68. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  69. const uint8_t *py_2index);
  70. extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  71. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  72. const uint8_t *py_2index, const uint8_t *pa_2index);
  73. extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const uint8_t *pu_index,
  74. const uint8_t *pv_index, const uint64_t *pointer_c_dither,
  75. const uint8_t *py_2index, const uint8_t *pa_2index);
  76. #if ARCH_X86_64
  77. extern void ff_yuv_420_gbrp24_ssse3(x86_reg index, uint8_t *image, uint8_t *dst_b, uint8_t *dst_r,
  78. const uint8_t *pu_index, const uint8_t *pv_index,
  79. const uint64_t *pointer_c_dither,
  80. const uint8_t *py_2index);
  81. #endif
  82. static inline int yuv420_rgb15_ssse3(SwsInternal *c, const uint8_t *const src[],
  83. const int srcStride[],
  84. int srcSliceY, int srcSliceH,
  85. uint8_t *const dst[], const int dstStride[])
  86. {
  87. int y, h_size, vshift;
  88. YUV2RGB_LOOP(2)
  89. c->blueDither = ff_dither8[y & 1];
  90. c->greenDither = ff_dither8[y & 1];
  91. c->redDither = ff_dither8[(y + 1) & 1];
  92. ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  93. }
  94. return srcSliceH;
  95. }
  96. static inline int yuv420_rgb16_ssse3(SwsInternal *c, const uint8_t *const src[],
  97. const int srcStride[],
  98. int srcSliceY, int srcSliceH,
  99. uint8_t *const dst[], const int dstStride[])
  100. {
  101. int y, h_size, vshift;
  102. YUV2RGB_LOOP(2)
  103. c->blueDither = ff_dither8[y & 1];
  104. c->greenDither = ff_dither4[y & 1];
  105. c->redDither = ff_dither8[(y + 1) & 1];
  106. ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  107. }
  108. return srcSliceH;
  109. }
  110. static inline int yuv420_rgb32_ssse3(SwsInternal *c, const uint8_t *const src[],
  111. const int srcStride[],
  112. int srcSliceY, int srcSliceH,
  113. uint8_t *const dst[], const int dstStride[])
  114. {
  115. int y, h_size, vshift;
  116. YUV2RGB_LOOP(4)
  117. ff_yuv_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  118. }
  119. return srcSliceH;
  120. }
  121. static inline int yuv420_bgr32_ssse3(SwsInternal *c, const uint8_t *const src[],
  122. const int srcStride[],
  123. int srcSliceY, int srcSliceH,
  124. uint8_t *const dst[], const int dstStride[])
  125. {
  126. int y, h_size, vshift;
  127. YUV2RGB_LOOP(4)
  128. ff_yuv_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  129. }
  130. return srcSliceH;
  131. }
  132. static inline int yuva420_rgb32_ssse3(SwsInternal *c, const uint8_t *const src[],
  133. const int srcStride[],
  134. int srcSliceY, int srcSliceH,
  135. uint8_t *const dst[], const int dstStride[])
  136. {
  137. int y, h_size, vshift;
  138. YUV2RGB_LOOP(4)
  139. const uint8_t *pa = src[3] + y * srcStride[3];
  140. ff_yuva_420_rgb32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
  141. }
  142. return srcSliceH;
  143. }
  144. static inline int yuva420_bgr32_ssse3(SwsInternal *c, const uint8_t *const src[],
  145. const int srcStride[],
  146. int srcSliceY, int srcSliceH,
  147. uint8_t *const dst[], const int dstStride[])
  148. {
  149. int y, h_size, vshift;
  150. YUV2RGB_LOOP(4)
  151. const uint8_t *pa = src[3] + y * srcStride[3];
  152. ff_yuva_420_bgr32_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index, pa - 2 * index);
  153. }
  154. return srcSliceH;
  155. }
  156. static inline int yuv420_rgb24_ssse3(SwsInternal *c, const uint8_t *const src[],
  157. const int srcStride[],
  158. int srcSliceY, int srcSliceH,
  159. uint8_t *const dst[], const int dstStride[])
  160. {
  161. int y, h_size, vshift;
  162. YUV2RGB_LOOP(3)
  163. ff_yuv_420_rgb24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  164. }
  165. return srcSliceH;
  166. }
  167. static inline int yuv420_bgr24_ssse3(SwsInternal *c, const uint8_t *const src[],
  168. const int srcStride[],
  169. int srcSliceY, int srcSliceH,
  170. uint8_t *const dst[], const int dstStride[])
  171. {
  172. int y, h_size, vshift;
  173. YUV2RGB_LOOP(3)
  174. ff_yuv_420_bgr24_ssse3(index, image, pu - index, pv - index, &(c->redDither), py - 2 * index);
  175. }
  176. return srcSliceH;
  177. }
  178. #if ARCH_X86_64
  179. static inline int yuv420_gbrp_ssse3(SwsInternal *c, const uint8_t *const src[],
  180. const int srcStride[],
  181. int srcSliceY, int srcSliceH,
  182. uint8_t *const dst[], const int dstStride[])
  183. {
  184. int y, h_size, vshift;
  185. h_size = (c->dstW + 7) & ~7;
  186. if (h_size * 3 > FFABS(dstStride[0]))
  187. h_size -= 8;
  188. vshift = c->srcFormat != AV_PIX_FMT_YUV422P;
  189. for (y = 0; y < srcSliceH; y++) {
  190. uint8_t *dst_g = dst[0] + (y + srcSliceY) * dstStride[0];
  191. uint8_t *dst_b = dst[1] + (y + srcSliceY) * dstStride[1];
  192. uint8_t *dst_r = dst[2] + (y + srcSliceY) * dstStride[2];
  193. const uint8_t *py = src[0] + y * srcStride[0];
  194. const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1];
  195. const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2];
  196. x86_reg index = -h_size / 2;
  197. ff_yuv_420_gbrp24_ssse3(index, dst_g, dst_b, dst_r, pu - index, pv - index, &(c->redDither), py - 2 * index);
  198. }
  199. return srcSliceH;
  200. }
  201. #endif
  202. #endif /* HAVE_X86ASM */
  203. av_cold SwsFunc ff_yuv2rgb_init_x86(SwsInternal *c)
  204. {
  205. #if HAVE_X86ASM
  206. int cpu_flags = av_get_cpu_flags();
  207. if (EXTERNAL_SSSE3(cpu_flags)) {
  208. switch (c->dstFormat) {
  209. case AV_PIX_FMT_RGB32:
  210. if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
  211. #if CONFIG_SWSCALE_ALPHA
  212. return yuva420_rgb32_ssse3;
  213. #endif
  214. break;
  215. } else
  216. return yuv420_rgb32_ssse3;
  217. case AV_PIX_FMT_BGR32:
  218. if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
  219. #if CONFIG_SWSCALE_ALPHA
  220. return yuva420_bgr32_ssse3;
  221. #endif
  222. break;
  223. } else
  224. return yuv420_bgr32_ssse3;
  225. case AV_PIX_FMT_RGB24:
  226. return yuv420_rgb24_ssse3;
  227. case AV_PIX_FMT_BGR24:
  228. return yuv420_bgr24_ssse3;
  229. case AV_PIX_FMT_RGB565:
  230. return yuv420_rgb16_ssse3;
  231. case AV_PIX_FMT_RGB555:
  232. return yuv420_rgb15_ssse3;
  233. #if ARCH_X86_64
  234. case AV_PIX_FMT_GBRP:
  235. return yuv420_gbrp_ssse3;
  236. #endif
  237. }
  238. }
  239. #endif /* HAVE_X86ASM */
  240. return NULL;
  241. }