swscale.c 116 KB


  1. /*
  2. * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <inttypes.h>
  21. #include <string.h>
  22. #include <math.h>
  23. #include <stdio.h>
  24. #include "config.h"
  25. #include <assert.h>
  26. #include "swscale.h"
  27. #include "swscale_internal.h"
  28. #include "rgb2rgb.h"
  29. #include "libavutil/avassert.h"
  30. #include "libavutil/intreadwrite.h"
  31. #include "libavutil/cpu.h"
  32. #include "libavutil/avutil.h"
  33. #include "libavutil/mathematics.h"
  34. #include "libavutil/bswap.h"
  35. #include "libavutil/pixdesc.h"
  36. #define RGB2YUV_SHIFT 15
  37. #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  38. #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  39. #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  40. #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  41. #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  42. #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  43. #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
  44. #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  45. #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
  46. /*
  47. NOTES
  48. Special versions: fast Y 1:1 scaling (no interpolation in y direction)
  49. TODO
  50. more intelligent misalignment avoidance for the horizontal scaler
  51. write special vertical cubic upscale version
  52. optimize C code (YV12 / minmax)
  53. add support for packed pixel YUV input & output
  54. add support for Y8 output
  55. optimize BGR24 & BGR32
  56. add BGR4 output support
  57. write special BGR->BGR scaler
  58. */
  59. DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
  60. { 1, 3, 1, 3, 1, 3, 1, 3, },
  61. { 2, 0, 2, 0, 2, 0, 2, 0, },
  62. };
  63. DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
  64. { 6, 2, 6, 2, 6, 2, 6, 2, },
  65. { 0, 4, 0, 4, 0, 4, 0, 4, },
  66. };
  67. DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
  68. { 8, 4, 11, 7, 8, 4, 11, 7, },
  69. { 2, 14, 1, 13, 2, 14, 1, 13, },
  70. { 10, 6, 9, 5, 10, 6, 9, 5, },
  71. { 0, 12, 3, 15, 0, 12, 3, 15, },
  72. };
  73. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
  74. { 17, 9, 23, 15, 16, 8, 22, 14, },
  75. { 5, 29, 3, 27, 4, 28, 2, 26, },
  76. { 21, 13, 19, 11, 20, 12, 18, 10, },
  77. { 0, 24, 6, 30, 1, 25, 7, 31, },
  78. { 16, 8, 22, 14, 17, 9, 23, 15, },
  79. { 4, 28, 2, 26, 5, 29, 3, 27, },
  80. { 20, 12, 18, 10, 21, 13, 19, 11, },
  81. { 1, 25, 7, 31, 0, 24, 6, 30, },
  82. };
  83. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
  84. { 0, 55, 14, 68, 3, 58, 17, 72, },
  85. { 37, 18, 50, 32, 40, 22, 54, 35, },
  86. { 9, 64, 5, 59, 13, 67, 8, 63, },
  87. { 46, 27, 41, 23, 49, 31, 44, 26, },
  88. { 2, 57, 16, 71, 1, 56, 15, 70, },
  89. { 39, 21, 52, 34, 38, 19, 51, 33, },
  90. { 11, 66, 7, 62, 10, 65, 6, 60, },
  91. { 48, 30, 43, 25, 47, 29, 42, 24, },
  92. };
  93. #if 1
  94. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  95. {117, 62, 158, 103, 113, 58, 155, 100, },
  96. { 34, 199, 21, 186, 31, 196, 17, 182, },
  97. {144, 89, 131, 76, 141, 86, 127, 72, },
  98. { 0, 165, 41, 206, 10, 175, 52, 217, },
  99. {110, 55, 151, 96, 120, 65, 162, 107, },
  100. { 28, 193, 14, 179, 38, 203, 24, 189, },
  101. {138, 83, 124, 69, 148, 93, 134, 79, },
  102. { 7, 172, 48, 213, 3, 168, 45, 210, },
  103. };
  104. #elif 1
  105. // tries to correct a gamma of 1.5
  106. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  107. { 0, 143, 18, 200, 2, 156, 25, 215, },
  108. { 78, 28, 125, 64, 89, 36, 138, 74, },
  109. { 10, 180, 3, 161, 16, 195, 8, 175, },
  110. {109, 51, 93, 38, 121, 60, 105, 47, },
  111. { 1, 152, 23, 210, 0, 147, 20, 205, },
  112. { 85, 33, 134, 71, 81, 30, 130, 67, },
  113. { 14, 190, 6, 171, 12, 185, 5, 166, },
  114. {117, 57, 101, 44, 113, 54, 97, 41, },
  115. };
  116. #elif 1
  117. // tries to correct a gamma of 2.0
  118. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  119. { 0, 124, 8, 193, 0, 140, 12, 213, },
  120. { 55, 14, 104, 42, 66, 19, 119, 52, },
  121. { 3, 168, 1, 145, 6, 187, 3, 162, },
  122. { 86, 31, 70, 21, 99, 39, 82, 28, },
  123. { 0, 134, 11, 206, 0, 129, 9, 200, },
  124. { 62, 17, 114, 48, 58, 16, 109, 45, },
  125. { 5, 181, 2, 157, 4, 175, 1, 151, },
  126. { 95, 36, 78, 26, 90, 34, 74, 24, },
  127. };
  128. #else
  129. // tries to correct a gamma of 2.5
  130. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
  131. { 0, 107, 3, 187, 0, 125, 6, 212, },
  132. { 39, 7, 86, 28, 49, 11, 102, 36, },
  133. { 1, 158, 0, 131, 3, 180, 1, 151, },
  134. { 68, 19, 52, 12, 81, 25, 64, 17, },
  135. { 0, 119, 5, 203, 0, 113, 4, 195, },
  136. { 45, 9, 96, 33, 42, 8, 91, 30, },
  137. { 2, 172, 1, 144, 2, 165, 0, 137, },
  138. { 77, 23, 60, 15, 72, 21, 56, 14, },
  139. };
  140. #endif
  141. DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
  142. { 36, 68, 60, 92, 34, 66, 58, 90,},
  143. { 100, 4,124, 28, 98, 2,122, 26,},
  144. { 52, 84, 44, 76, 50, 82, 42, 74,},
  145. { 116, 20,108, 12,114, 18,106, 10,},
  146. { 32, 64, 56, 88, 38, 70, 62, 94,},
  147. { 96, 0,120, 24,102, 6,126, 30,},
  148. { 48, 80, 40, 72, 54, 86, 46, 78,},
  149. { 112, 16,104, 8,118, 22,110, 14,},
  150. };
  151. DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
  152. { 64, 64, 64, 64, 64, 64, 64, 64 };
  153. DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
  154. {
  155. { 0, 1, 0, 1, 0, 1, 0, 1,},
  156. { 1, 0, 1, 0, 1, 0, 1, 0,},
  157. { 0, 1, 0, 1, 0, 1, 0, 1,},
  158. { 1, 0, 1, 0, 1, 0, 1, 0,},
  159. { 0, 1, 0, 1, 0, 1, 0, 1,},
  160. { 1, 0, 1, 0, 1, 0, 1, 0,},
  161. { 0, 1, 0, 1, 0, 1, 0, 1,},
  162. { 1, 0, 1, 0, 1, 0, 1, 0,},
  163. },{
  164. { 1, 2, 1, 2, 1, 2, 1, 2,},
  165. { 3, 0, 3, 0, 3, 0, 3, 0,},
  166. { 1, 2, 1, 2, 1, 2, 1, 2,},
  167. { 3, 0, 3, 0, 3, 0, 3, 0,},
  168. { 1, 2, 1, 2, 1, 2, 1, 2,},
  169. { 3, 0, 3, 0, 3, 0, 3, 0,},
  170. { 1, 2, 1, 2, 1, 2, 1, 2,},
  171. { 3, 0, 3, 0, 3, 0, 3, 0,},
  172. },{
  173. { 2, 4, 3, 5, 2, 4, 3, 5,},
  174. { 6, 0, 7, 1, 6, 0, 7, 1,},
  175. { 3, 5, 2, 4, 3, 5, 2, 4,},
  176. { 7, 1, 6, 0, 7, 1, 6, 0,},
  177. { 2, 4, 3, 5, 2, 4, 3, 5,},
  178. { 6, 0, 7, 1, 6, 0, 7, 1,},
  179. { 3, 5, 2, 4, 3, 5, 2, 4,},
  180. { 7, 1, 6, 0, 7, 1, 6, 0,},
  181. },{
  182. { 4, 8, 7, 11, 4, 8, 7, 11,},
  183. { 12, 0, 15, 3, 12, 0, 15, 3,},
  184. { 6, 10, 5, 9, 6, 10, 5, 9,},
  185. { 14, 2, 13, 1, 14, 2, 13, 1,},
  186. { 4, 8, 7, 11, 4, 8, 7, 11,},
  187. { 12, 0, 15, 3, 12, 0, 15, 3,},
  188. { 6, 10, 5, 9, 6, 10, 5, 9,},
  189. { 14, 2, 13, 1, 14, 2, 13, 1,},
  190. },{
  191. { 9, 17, 15, 23, 8, 16, 14, 22,},
  192. { 25, 1, 31, 7, 24, 0, 30, 6,},
  193. { 13, 21, 11, 19, 12, 20, 10, 18,},
  194. { 29, 5, 27, 3, 28, 4, 26, 2,},
  195. { 8, 16, 14, 22, 9, 17, 15, 23,},
  196. { 24, 0, 30, 6, 25, 1, 31, 7,},
  197. { 12, 20, 10, 18, 13, 21, 11, 19,},
  198. { 28, 4, 26, 2, 29, 5, 27, 3,},
  199. },{
  200. { 18, 34, 30, 46, 17, 33, 29, 45,},
  201. { 50, 2, 62, 14, 49, 1, 61, 13,},
  202. { 26, 42, 22, 38, 25, 41, 21, 37,},
  203. { 58, 10, 54, 6, 57, 9, 53, 5,},
  204. { 16, 32, 28, 44, 19, 35, 31, 47,},
  205. { 48, 0, 60, 12, 51, 3, 63, 15,},
  206. { 24, 40, 20, 36, 27, 43, 23, 39,},
  207. { 56, 8, 52, 4, 59, 11, 55, 7,},
  208. },{
  209. { 18, 34, 30, 46, 17, 33, 29, 45,},
  210. { 50, 2, 62, 14, 49, 1, 61, 13,},
  211. { 26, 42, 22, 38, 25, 41, 21, 37,},
  212. { 58, 10, 54, 6, 57, 9, 53, 5,},
  213. { 16, 32, 28, 44, 19, 35, 31, 47,},
  214. { 48, 0, 60, 12, 51, 3, 63, 15,},
  215. { 24, 40, 20, 36, 27, 43, 23, 39,},
  216. { 56, 8, 52, 4, 59, 11, 55, 7,},
  217. },{
  218. { 36, 68, 60, 92, 34, 66, 58, 90,},
  219. { 100, 4,124, 28, 98, 2,122, 26,},
  220. { 52, 84, 44, 76, 50, 82, 42, 74,},
  221. { 116, 20,108, 12,114, 18,106, 10,},
  222. { 32, 64, 56, 88, 38, 70, 62, 94,},
  223. { 96, 0,120, 24,102, 6,126, 30,},
  224. { 48, 80, 40, 72, 54, 86, 46, 78,},
  225. { 112, 16,104, 8,118, 22,110, 14,},
  226. }};
  227. static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
  228. const uint16_t dither_scale[15][16]={
  229. { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
  230. { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
  231. { 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
  232. { 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
  233. { 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
  234. { 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
  235. { 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
  236. { 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
  237. { 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
  238. { 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
  239. { 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
  240. { 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
  241. { 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
  242. { 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
  243. { 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
  244. };
  245. #define output_pixel(pos, val, bias, signedness) \
  246. if (big_endian) { \
  247. AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
  248. } else { \
  249. AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
  250. }
  251. static av_always_inline void
  252. yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
  253. int big_endian, int output_bits)
  254. {
  255. int i;
  256. int shift = 3;
  257. av_assert0(output_bits == 16);
  258. for (i = 0; i < dstW; i++) {
  259. int val = src[i] + (1 << (shift - 1));
  260. output_pixel(&dest[i], val, 0, uint);
  261. }
  262. }
  263. static av_always_inline void
  264. yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
  265. const int32_t **src, uint16_t *dest, int dstW,
  266. int big_endian, int output_bits)
  267. {
  268. int i;
  269. int shift = 15;
  270. av_assert0(output_bits == 16);
  271. for (i = 0; i < dstW; i++) {
  272. int val = 1 << (shift - 1);
  273. int j;
  274. /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
  275. * filters (or anything with negative coeffs, the range can be slightly
  276. * wider in both directions. To account for this overflow, we subtract
  277. * a constant so it always fits in the signed range (assuming a
  278. * reasonable filterSize), and re-add that at the end. */
  279. val -= 0x40000000;
  280. for (j = 0; j < filterSize; j++)
  281. val += src[j][i] * filter[j];
  282. output_pixel(&dest[i], val, 0x8000, int);
  283. }
  284. }
  285. #undef output_pixel
  286. #define output_pixel(pos, val) \
  287. if (big_endian) { \
  288. AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
  289. } else { \
  290. AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
  291. }
  292. static av_always_inline void
  293. yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
  294. int big_endian, int output_bits)
  295. {
  296. int i;
  297. int shift = 15 - output_bits;
  298. for (i = 0; i < dstW; i++) {
  299. int val = src[i] + (1 << (shift - 1));
  300. output_pixel(&dest[i], val);
  301. }
  302. }
  303. static av_always_inline void
  304. yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
  305. const int16_t **src, uint16_t *dest, int dstW,
  306. int big_endian, int output_bits)
  307. {
  308. int i;
  309. int shift = 11 + 16 - output_bits;
  310. for (i = 0; i < dstW; i++) {
  311. int val = 1 << (shift - 1);
  312. int j;
  313. for (j = 0; j < filterSize; j++)
  314. val += src[j][i] * filter[j];
  315. output_pixel(&dest[i], val);
  316. }
  317. }
  318. #undef output_pixel
  319. #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
  320. static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
  321. uint8_t *dest, int dstW, \
  322. const uint8_t *dither, int offset)\
  323. { \
  324. yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
  325. (uint16_t *) dest, dstW, is_be, bits); \
  326. }\
  327. static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
  328. const int16_t **src, uint8_t *dest, int dstW, \
  329. const uint8_t *dither, int offset)\
  330. { \
  331. yuv2planeX_## template_size ## _c_template(filter, \
  332. filterSize, (const typeX_t **) src, \
  333. (uint16_t *) dest, dstW, is_be, bits); \
  334. }
  335. yuv2NBPS( 9, BE, 1, 10, int16_t)
  336. yuv2NBPS( 9, LE, 0, 10, int16_t)
  337. yuv2NBPS(10, BE, 1, 10, int16_t)
  338. yuv2NBPS(10, LE, 0, 10, int16_t)
  339. yuv2NBPS(16, BE, 1, 16, int32_t)
  340. yuv2NBPS(16, LE, 0, 16, int32_t)
  341. static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
  342. const int16_t **src, uint8_t *dest, int dstW,
  343. const uint8_t *dither, int offset)
  344. {
  345. int i;
  346. for (i=0; i<dstW; i++) {
  347. int val = dither[(i + offset) & 7] << 12;
  348. int j;
  349. for (j=0; j<filterSize; j++)
  350. val += src[j][i] * filter[j];
  351. dest[i]= av_clip_uint8(val>>19);
  352. }
  353. }
  354. static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
  355. const uint8_t *dither, int offset)
  356. {
  357. int i;
  358. for (i=0; i<dstW; i++) {
  359. int val = (src[i] + dither[(i + offset) & 7]) >> 7;
  360. dest[i]= av_clip_uint8(val);
  361. }
  362. }
  363. static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
  364. const int16_t **chrUSrc, const int16_t **chrVSrc,
  365. uint8_t *dest, int chrDstW)
  366. {
  367. enum PixelFormat dstFormat = c->dstFormat;
  368. const uint8_t *chrDither = c->chrDither8;
  369. int i;
  370. if (dstFormat == PIX_FMT_NV12)
  371. for (i=0; i<chrDstW; i++) {
  372. int u = chrDither[i & 7] << 12;
  373. int v = chrDither[(i + 3) & 7] << 12;
  374. int j;
  375. for (j=0; j<chrFilterSize; j++) {
  376. u += chrUSrc[j][i] * chrFilter[j];
  377. v += chrVSrc[j][i] * chrFilter[j];
  378. }
  379. dest[2*i]= av_clip_uint8(u>>19);
  380. dest[2*i+1]= av_clip_uint8(v>>19);
  381. }
  382. else
  383. for (i=0; i<chrDstW; i++) {
  384. int u = chrDither[i & 7] << 12;
  385. int v = chrDither[(i + 3) & 7] << 12;
  386. int j;
  387. for (j=0; j<chrFilterSize; j++) {
  388. u += chrUSrc[j][i] * chrFilter[j];
  389. v += chrVSrc[j][i] * chrFilter[j];
  390. }
  391. dest[2*i]= av_clip_uint8(v>>19);
  392. dest[2*i+1]= av_clip_uint8(u>>19);
  393. }
  394. }
  395. #define output_pixel(pos, val) \
  396. if (target == PIX_FMT_GRAY16BE) { \
  397. AV_WB16(pos, val); \
  398. } else { \
  399. AV_WL16(pos, val); \
  400. }
  401. static av_always_inline void
  402. yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
  403. const int32_t **lumSrc, int lumFilterSize,
  404. const int16_t *chrFilter, const int32_t **chrUSrc,
  405. const int32_t **chrVSrc, int chrFilterSize,
  406. const int32_t **alpSrc, uint16_t *dest, int dstW,
  407. int y, enum PixelFormat target)
  408. {
  409. int i;
  410. for (i = 0; i < (dstW >> 1); i++) {
  411. int j;
  412. int Y1 = (1 << 14) - 0x40000000;
  413. int Y2 = (1 << 14) - 0x40000000;
  414. for (j = 0; j < lumFilterSize; j++) {
  415. Y1 += lumSrc[j][i * 2] * lumFilter[j];
  416. Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
  417. }
  418. Y1 >>= 15;
  419. Y2 >>= 15;
  420. Y1 = av_clip_int16(Y1);
  421. Y2 = av_clip_int16(Y2);
  422. output_pixel(&dest[i * 2 + 0], 0x8000 + Y1);
  423. output_pixel(&dest[i * 2 + 1], 0x8000 + Y2);
  424. }
  425. }
  426. static av_always_inline void
  427. yuv2gray16_2_c_template(SwsContext *c, const int32_t *buf[2],
  428. const int32_t *ubuf[2], const int32_t *vbuf[2],
  429. const int32_t *abuf[2], uint16_t *dest, int dstW,
  430. int yalpha, int uvalpha, int y,
  431. enum PixelFormat target)
  432. {
  433. int yalpha1 = 4095 - yalpha;
  434. int i;
  435. const int32_t *buf0 = buf[0], *buf1 = buf[1];
  436. for (i = 0; i < (dstW >> 1); i++) {
  437. int Y1 = (buf0[i * 2 ] * yalpha1 + buf1[i * 2 ] * yalpha) >> 15;
  438. int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 15;
  439. output_pixel(&dest[i * 2 + 0], Y1);
  440. output_pixel(&dest[i * 2 + 1], Y2);
  441. }
  442. }
  443. static av_always_inline void
  444. yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
  445. const int32_t *ubuf[2], const int32_t *vbuf[2],
  446. const int32_t *abuf0, uint16_t *dest, int dstW,
  447. int uvalpha, int y, enum PixelFormat target)
  448. {
  449. int i;
  450. for (i = 0; i < (dstW >> 1); i++) {
  451. int Y1 = (buf0[i * 2 ]+4)>>3;
  452. int Y2 = (buf0[i * 2 + 1]+4)>>3;
  453. output_pixel(&dest[i * 2 + 0], Y1);
  454. output_pixel(&dest[i * 2 + 1], Y2);
  455. }
  456. }
  457. #undef output_pixel
  458. #define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
  459. static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
  460. const int16_t **_lumSrc, int lumFilterSize, \
  461. const int16_t *chrFilter, const int16_t **_chrUSrc, \
  462. const int16_t **_chrVSrc, int chrFilterSize, \
  463. const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
  464. int y) \
  465. { \
  466. const int32_t **lumSrc = (const int32_t **) _lumSrc, \
  467. **chrUSrc = (const int32_t **) _chrUSrc, \
  468. **chrVSrc = (const int32_t **) _chrVSrc, \
  469. **alpSrc = (const int32_t **) _alpSrc; \
  470. uint16_t *dest = (uint16_t *) _dest; \
  471. name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
  472. chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
  473. alpSrc, dest, dstW, y, fmt); \
  474. } \
  475. \
  476. static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
  477. const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
  478. const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
  479. int yalpha, int uvalpha, int y) \
  480. { \
  481. const int32_t **buf = (const int32_t **) _buf, \
  482. **ubuf = (const int32_t **) _ubuf, \
  483. **vbuf = (const int32_t **) _vbuf, \
  484. **abuf = (const int32_t **) _abuf; \
  485. uint16_t *dest = (uint16_t *) _dest; \
  486. name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
  487. dest, dstW, yalpha, uvalpha, y, fmt); \
  488. } \
  489. \
  490. static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
  491. const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
  492. const int16_t *_abuf0, uint8_t *_dest, int dstW, \
  493. int uvalpha, int y) \
  494. { \
  495. const int32_t *buf0 = (const int32_t *) _buf0, \
  496. **ubuf = (const int32_t **) _ubuf, \
  497. **vbuf = (const int32_t **) _vbuf, \
  498. *abuf0 = (const int32_t *) _abuf0; \
  499. uint16_t *dest = (uint16_t *) _dest; \
  500. name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
  501. dstW, uvalpha, y, fmt); \
  502. }
  503. YUV2PACKED16WRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE)
  504. YUV2PACKED16WRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE)
  505. #define output_pixel(pos, acc) \
  506. if (target == PIX_FMT_MONOBLACK) { \
  507. pos = acc; \
  508. } else { \
  509. pos = ~acc; \
  510. }
  511. static av_always_inline void
  512. yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
  513. const int16_t **lumSrc, int lumFilterSize,
  514. const int16_t *chrFilter, const int16_t **chrUSrc,
  515. const int16_t **chrVSrc, int chrFilterSize,
  516. const int16_t **alpSrc, uint8_t *dest, int dstW,
  517. int y, enum PixelFormat target)
  518. {
  519. const uint8_t * const d128=dither_8x8_220[y&7];
  520. uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
  521. int i;
  522. unsigned acc = 0;
  523. for (i = 0; i < dstW - 1; i += 2) {
  524. int j;
  525. int Y1 = 1 << 18;
  526. int Y2 = 1 << 18;
  527. for (j = 0; j < lumFilterSize; j++) {
  528. Y1 += lumSrc[j][i] * lumFilter[j];
  529. Y2 += lumSrc[j][i+1] * lumFilter[j];
  530. }
  531. Y1 >>= 19;
  532. Y2 >>= 19;
  533. if ((Y1 | Y2) & 0x100) {
  534. Y1 = av_clip_uint8(Y1);
  535. Y2 = av_clip_uint8(Y2);
  536. }
  537. acc += acc + g[Y1 + d128[(i + 0) & 7]];
  538. acc += acc + g[Y2 + d128[(i + 1) & 7]];
  539. if ((i & 7) == 6) {
  540. output_pixel(*dest++, acc);
  541. }
  542. }
  543. }
  544. static av_always_inline void
  545. yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
  546. const int16_t *ubuf[2], const int16_t *vbuf[2],
  547. const int16_t *abuf[2], uint8_t *dest, int dstW,
  548. int yalpha, int uvalpha, int y,
  549. enum PixelFormat target)
  550. {
  551. const int16_t *buf0 = buf[0], *buf1 = buf[1];
  552. const uint8_t * const d128 = dither_8x8_220[y & 7];
  553. uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
  554. int yalpha1 = 4095 - yalpha;
  555. int i;
  556. for (i = 0; i < dstW - 7; i += 8) {
  557. int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
  558. acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
  559. acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
  560. acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
  561. acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
  562. acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
  563. acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
  564. acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
  565. output_pixel(*dest++, acc);
  566. }
  567. }
  568. static av_always_inline void
  569. yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
  570. const int16_t *ubuf[2], const int16_t *vbuf[2],
  571. const int16_t *abuf0, uint8_t *dest, int dstW,
  572. int uvalpha, int y, enum PixelFormat target)
  573. {
  574. const uint8_t * const d128 = dither_8x8_220[y & 7];
  575. uint8_t *g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
  576. int i;
  577. for (i = 0; i < dstW - 7; i += 8) {
  578. int acc = g[(buf0[i ] >> 7) + d128[0]];
  579. acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
  580. acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
  581. acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
  582. acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
  583. acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
  584. acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
  585. acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
  586. output_pixel(*dest++, acc);
  587. }
  588. }
  589. #undef output_pixel
  590. #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
  591. static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
  592. const int16_t **lumSrc, int lumFilterSize, \
  593. const int16_t *chrFilter, const int16_t **chrUSrc, \
  594. const int16_t **chrVSrc, int chrFilterSize, \
  595. const int16_t **alpSrc, uint8_t *dest, int dstW, \
  596. int y) \
  597. { \
  598. name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
  599. chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
  600. alpSrc, dest, dstW, y, fmt); \
  601. } \
  602. \
  603. static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
  604. const int16_t *ubuf[2], const int16_t *vbuf[2], \
  605. const int16_t *abuf[2], uint8_t *dest, int dstW, \
  606. int yalpha, int uvalpha, int y) \
  607. { \
  608. name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
  609. dest, dstW, yalpha, uvalpha, y, fmt); \
  610. } \
  611. \
  612. static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
  613. const int16_t *ubuf[2], const int16_t *vbuf[2], \
  614. const int16_t *abuf0, uint8_t *dest, int dstW, \
  615. int uvalpha, int y) \
  616. { \
  617. name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
  618. abuf0, dest, dstW, uvalpha, \
  619. y, fmt); \
  620. }
  621. YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE)
  622. YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK)
  623. #define output_pixels(pos, Y1, U, Y2, V) \
  624. if (target == PIX_FMT_YUYV422) { \
  625. dest[pos + 0] = Y1; \
  626. dest[pos + 1] = U; \
  627. dest[pos + 2] = Y2; \
  628. dest[pos + 3] = V; \
  629. } else { \
  630. dest[pos + 0] = U; \
  631. dest[pos + 1] = Y1; \
  632. dest[pos + 2] = V; \
  633. dest[pos + 3] = Y2; \
  634. }
  635. static av_always_inline void
  636. yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
  637. const int16_t **lumSrc, int lumFilterSize,
  638. const int16_t *chrFilter, const int16_t **chrUSrc,
  639. const int16_t **chrVSrc, int chrFilterSize,
  640. const int16_t **alpSrc, uint8_t *dest, int dstW,
  641. int y, enum PixelFormat target)
  642. {
  643. int i;
  644. for (i = 0; i < (dstW >> 1); i++) {
  645. int j;
  646. int Y1 = 1 << 18;
  647. int Y2 = 1 << 18;
  648. int U = 1 << 18;
  649. int V = 1 << 18;
  650. for (j = 0; j < lumFilterSize; j++) {
  651. Y1 += lumSrc[j][i * 2] * lumFilter[j];
  652. Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
  653. }
  654. for (j = 0; j < chrFilterSize; j++) {
  655. U += chrUSrc[j][i] * chrFilter[j];
  656. V += chrVSrc[j][i] * chrFilter[j];
  657. }
  658. Y1 >>= 19;
  659. Y2 >>= 19;
  660. U >>= 19;
  661. V >>= 19;
  662. if ((Y1 | Y2 | U | V) & 0x100) {
  663. Y1 = av_clip_uint8(Y1);
  664. Y2 = av_clip_uint8(Y2);
  665. U = av_clip_uint8(U);
  666. V = av_clip_uint8(V);
  667. }
  668. output_pixels(4*i, Y1, U, Y2, V);
  669. }
  670. }
  671. static av_always_inline void
  672. yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
  673. const int16_t *ubuf[2], const int16_t *vbuf[2],
  674. const int16_t *abuf[2], uint8_t *dest, int dstW,
  675. int yalpha, int uvalpha, int y,
  676. enum PixelFormat target)
  677. {
  678. const int16_t *buf0 = buf[0], *buf1 = buf[1],
  679. *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  680. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
  681. int yalpha1 = 4095 - yalpha;
  682. int uvalpha1 = 4095 - uvalpha;
  683. int i;
  684. for (i = 0; i < (dstW >> 1); i++) {
  685. int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
  686. int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
  687. int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
  688. int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
  689. output_pixels(i * 4, Y1, U, Y2, V);
  690. }
  691. }
  692. static av_always_inline void
  693. yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
  694. const int16_t *ubuf[2], const int16_t *vbuf[2],
  695. const int16_t *abuf0, uint8_t *dest, int dstW,
  696. int uvalpha, int y, enum PixelFormat target)
  697. {
  698. const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  699. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
  700. int i;
  701. if (uvalpha < 2048) {
  702. for (i = 0; i < (dstW >> 1); i++) {
  703. int Y1 = buf0[i * 2] >> 7;
  704. int Y2 = buf0[i * 2 + 1] >> 7;
  705. int U = ubuf1[i] >> 7;
  706. int V = vbuf1[i] >> 7;
  707. output_pixels(i * 4, Y1, U, Y2, V);
  708. }
  709. } else {
  710. for (i = 0; i < (dstW >> 1); i++) {
  711. int Y1 = buf0[i * 2] >> 7;
  712. int Y2 = buf0[i * 2 + 1] >> 7;
  713. int U = (ubuf0[i] + ubuf1[i]) >> 8;
  714. int V = (vbuf0[i] + vbuf1[i]) >> 8;
  715. output_pixels(i * 4, Y1, U, Y2, V);
  716. }
  717. }
  718. }
  719. #undef output_pixels
  720. YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422)
  721. YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422)
  722. #define R_B ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? R : B)
  723. #define B_R ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? B : R)
  724. #define output_pixel(pos, val) \
  725. if (isBE(target)) { \
  726. AV_WB16(pos, val); \
  727. } else { \
  728. AV_WL16(pos, val); \
  729. }
  730. static av_always_inline void
  731. yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
  732. const int32_t **lumSrc, int lumFilterSize,
  733. const int16_t *chrFilter, const int32_t **chrUSrc,
  734. const int32_t **chrVSrc, int chrFilterSize,
  735. const int32_t **alpSrc, uint16_t *dest, int dstW,
  736. int y, enum PixelFormat target)
  737. {
  738. int i;
  739. for (i = 0; i < (dstW >> 1); i++) {
  740. int j;
  741. int Y1 = -0x40000000;
  742. int Y2 = -0x40000000;
  743. int U = -128 << 23; // 19
  744. int V = -128 << 23;
  745. int R, G, B;
  746. for (j = 0; j < lumFilterSize; j++) {
  747. Y1 += lumSrc[j][i * 2] * lumFilter[j];
  748. Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
  749. }
  750. for (j = 0; j < chrFilterSize; j++) {
  751. U += chrUSrc[j][i] * chrFilter[j];
  752. V += chrVSrc[j][i] * chrFilter[j];
  753. }
  754. // 8bit: 12+15=27; 16-bit: 12+19=31
  755. Y1 >>= 14; // 10
  756. Y1 += 0x10000;
  757. Y2 >>= 14;
  758. Y2 += 0x10000;
  759. U >>= 14;
  760. V >>= 14;
  761. // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
  762. Y1 -= c->yuv2rgb_y_offset;
  763. Y2 -= c->yuv2rgb_y_offset;
  764. Y1 *= c->yuv2rgb_y_coeff;
  765. Y2 *= c->yuv2rgb_y_coeff;
  766. Y1 += 1 << 13; // 21
  767. Y2 += 1 << 13;
  768. // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
  769. R = V * c->yuv2rgb_v2r_coeff;
  770. G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
  771. B = U * c->yuv2rgb_u2b_coeff;
  772. // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
  773. output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
  774. output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
  775. output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
  776. output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
  777. output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
  778. output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
  779. dest += 6;
  780. }
  781. }
  782. static av_always_inline void
  783. yuv2rgb48_2_c_template(SwsContext *c, const int32_t *buf[2],
  784. const int32_t *ubuf[2], const int32_t *vbuf[2],
  785. const int32_t *abuf[2], uint16_t *dest, int dstW,
  786. int yalpha, int uvalpha, int y,
  787. enum PixelFormat target)
  788. {
  789. const int32_t *buf0 = buf[0], *buf1 = buf[1],
  790. *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  791. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
  792. int yalpha1 = 4095 - yalpha;
  793. int uvalpha1 = 4095 - uvalpha;
  794. int i;
  795. for (i = 0; i < (dstW >> 1); i++) {
  796. int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
  797. int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
  798. int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
  799. int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha + (-128 << 23)) >> 14;
  800. int R, G, B;
  801. Y1 -= c->yuv2rgb_y_offset;
  802. Y2 -= c->yuv2rgb_y_offset;
  803. Y1 *= c->yuv2rgb_y_coeff;
  804. Y2 *= c->yuv2rgb_y_coeff;
  805. Y1 += 1 << 13;
  806. Y2 += 1 << 13;
  807. R = V * c->yuv2rgb_v2r_coeff;
  808. G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
  809. B = U * c->yuv2rgb_u2b_coeff;
  810. output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
  811. output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
  812. output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
  813. output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
  814. output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
  815. output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
  816. dest += 6;
  817. }
  818. }
  819. static av_always_inline void
  820. yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
  821. const int32_t *ubuf[2], const int32_t *vbuf[2],
  822. const int32_t *abuf0, uint16_t *dest, int dstW,
  823. int uvalpha, int y, enum PixelFormat target)
  824. {
  825. const int32_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  826. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
  827. int i;
  828. if (uvalpha < 2048) {
  829. for (i = 0; i < (dstW >> 1); i++) {
  830. int Y1 = (buf0[i * 2] ) >> 2;
  831. int Y2 = (buf0[i * 2 + 1]) >> 2;
  832. int U = (ubuf0[i] + (-128 << 11)) >> 2;
  833. int V = (vbuf0[i] + (-128 << 11)) >> 2;
  834. int R, G, B;
  835. Y1 -= c->yuv2rgb_y_offset;
  836. Y2 -= c->yuv2rgb_y_offset;
  837. Y1 *= c->yuv2rgb_y_coeff;
  838. Y2 *= c->yuv2rgb_y_coeff;
  839. Y1 += 1 << 13;
  840. Y2 += 1 << 13;
  841. R = V * c->yuv2rgb_v2r_coeff;
  842. G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
  843. B = U * c->yuv2rgb_u2b_coeff;
  844. output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
  845. output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
  846. output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
  847. output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
  848. output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
  849. output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
  850. dest += 6;
  851. }
  852. } else {
  853. for (i = 0; i < (dstW >> 1); i++) {
  854. int Y1 = (buf0[i * 2] ) >> 2;
  855. int Y2 = (buf0[i * 2 + 1]) >> 2;
  856. int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
  857. int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
  858. int R, G, B;
  859. Y1 -= c->yuv2rgb_y_offset;
  860. Y2 -= c->yuv2rgb_y_offset;
  861. Y1 *= c->yuv2rgb_y_coeff;
  862. Y2 *= c->yuv2rgb_y_coeff;
  863. Y1 += 1 << 13;
  864. Y2 += 1 << 13;
  865. R = V * c->yuv2rgb_v2r_coeff;
  866. G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
  867. B = U * c->yuv2rgb_u2b_coeff;
  868. output_pixel(&dest[0], av_clip_uintp2(R_B + Y1, 30) >> 14);
  869. output_pixel(&dest[1], av_clip_uintp2( G + Y1, 30) >> 14);
  870. output_pixel(&dest[2], av_clip_uintp2(B_R + Y1, 30) >> 14);
  871. output_pixel(&dest[3], av_clip_uintp2(R_B + Y2, 30) >> 14);
  872. output_pixel(&dest[4], av_clip_uintp2( G + Y2, 30) >> 14);
  873. output_pixel(&dest[5], av_clip_uintp2(B_R + Y2, 30) >> 14);
  874. dest += 6;
  875. }
  876. }
  877. }
  878. #undef output_pixel
  879. #undef r_b
  880. #undef b_r
  881. YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
  882. YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
  883. YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
  884. YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
  885. /*
  886. * Write out 2 RGB pixels in the target pixel format. This function takes a
  887. * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
  888. * things like endianness conversion and shifting. The caller takes care of
  889. * setting the correct offset in these tables from the chroma (U/V) values.
  890. * This function then uses the luminance (Y1/Y2) values to write out the
  891. * correct RGB values into the destination buffer.
  892. */
  893. static av_always_inline void
  894. yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
  895. unsigned A1, unsigned A2,
  896. const void *_r, const void *_g, const void *_b, int y,
  897. enum PixelFormat target, int hasAlpha)
  898. {
  899. if (target == PIX_FMT_ARGB || target == PIX_FMT_RGBA ||
  900. target == PIX_FMT_ABGR || target == PIX_FMT_BGRA) {
  901. uint32_t *dest = (uint32_t *) _dest;
  902. const uint32_t *r = (const uint32_t *) _r;
  903. const uint32_t *g = (const uint32_t *) _g;
  904. const uint32_t *b = (const uint32_t *) _b;
  905. #if CONFIG_SMALL
  906. int sh = hasAlpha ? ((target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24) : 0;
  907. dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
  908. dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
  909. #else
  910. if (hasAlpha) {
  911. int sh = (target == PIX_FMT_RGB32_1 || target == PIX_FMT_BGR32_1) ? 0 : 24;
  912. dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
  913. dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
  914. } else {
  915. dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
  916. dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
  917. }
  918. #endif
  919. } else if (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) {
  920. uint8_t *dest = (uint8_t *) _dest;
  921. const uint8_t *r = (const uint8_t *) _r;
  922. const uint8_t *g = (const uint8_t *) _g;
  923. const uint8_t *b = (const uint8_t *) _b;
  924. #define r_b ((target == PIX_FMT_RGB24) ? r : b)
  925. #define b_r ((target == PIX_FMT_RGB24) ? b : r)
  926. dest[i * 6 + 0] = r_b[Y1];
  927. dest[i * 6 + 1] = g[Y1];
  928. dest[i * 6 + 2] = b_r[Y1];
  929. dest[i * 6 + 3] = r_b[Y2];
  930. dest[i * 6 + 4] = g[Y2];
  931. dest[i * 6 + 5] = b_r[Y2];
  932. #undef r_b
  933. #undef b_r
  934. } else if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565 ||
  935. target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555 ||
  936. target == PIX_FMT_RGB444 || target == PIX_FMT_BGR444) {
  937. uint16_t *dest = (uint16_t *) _dest;
  938. const uint16_t *r = (const uint16_t *) _r;
  939. const uint16_t *g = (const uint16_t *) _g;
  940. const uint16_t *b = (const uint16_t *) _b;
  941. int dr1, dg1, db1, dr2, dg2, db2;
  942. if (target == PIX_FMT_RGB565 || target == PIX_FMT_BGR565) {
  943. dr1 = dither_2x2_8[ y & 1 ][0];
  944. dg1 = dither_2x2_4[ y & 1 ][0];
  945. db1 = dither_2x2_8[(y & 1) ^ 1][0];
  946. dr2 = dither_2x2_8[ y & 1 ][1];
  947. dg2 = dither_2x2_4[ y & 1 ][1];
  948. db2 = dither_2x2_8[(y & 1) ^ 1][1];
  949. } else if (target == PIX_FMT_RGB555 || target == PIX_FMT_BGR555) {
  950. dr1 = dither_2x2_8[ y & 1 ][0];
  951. dg1 = dither_2x2_8[ y & 1 ][1];
  952. db1 = dither_2x2_8[(y & 1) ^ 1][0];
  953. dr2 = dither_2x2_8[ y & 1 ][1];
  954. dg2 = dither_2x2_8[ y & 1 ][0];
  955. db2 = dither_2x2_8[(y & 1) ^ 1][1];
  956. } else {
  957. dr1 = dither_4x4_16[ y & 3 ][0];
  958. dg1 = dither_4x4_16[ y & 3 ][1];
  959. db1 = dither_4x4_16[(y & 3) ^ 3][0];
  960. dr2 = dither_4x4_16[ y & 3 ][1];
  961. dg2 = dither_4x4_16[ y & 3 ][0];
  962. db2 = dither_4x4_16[(y & 3) ^ 3][1];
  963. }
  964. dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
  965. dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
  966. } else /* 8/4-bit */ {
  967. uint8_t *dest = (uint8_t *) _dest;
  968. const uint8_t *r = (const uint8_t *) _r;
  969. const uint8_t *g = (const uint8_t *) _g;
  970. const uint8_t *b = (const uint8_t *) _b;
  971. int dr1, dg1, db1, dr2, dg2, db2;
  972. if (target == PIX_FMT_RGB8 || target == PIX_FMT_BGR8) {
  973. const uint8_t * const d64 = dither_8x8_73[y & 7];
  974. const uint8_t * const d32 = dither_8x8_32[y & 7];
  975. dr1 = dg1 = d32[(i * 2 + 0) & 7];
  976. db1 = d64[(i * 2 + 0) & 7];
  977. dr2 = dg2 = d32[(i * 2 + 1) & 7];
  978. db2 = d64[(i * 2 + 1) & 7];
  979. } else {
  980. const uint8_t * const d64 = dither_8x8_73 [y & 7];
  981. const uint8_t * const d128 = dither_8x8_220[y & 7];
  982. dr1 = db1 = d128[(i * 2 + 0) & 7];
  983. dg1 = d64[(i * 2 + 0) & 7];
  984. dr2 = db2 = d128[(i * 2 + 1) & 7];
  985. dg2 = d64[(i * 2 + 1) & 7];
  986. }
  987. if (target == PIX_FMT_RGB4 || target == PIX_FMT_BGR4) {
  988. dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
  989. ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
  990. } else {
  991. dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
  992. dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
  993. }
  994. }
  995. }
  996. static av_always_inline void
  997. yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
  998. const int16_t **lumSrc, int lumFilterSize,
  999. const int16_t *chrFilter, const int16_t **chrUSrc,
  1000. const int16_t **chrVSrc, int chrFilterSize,
  1001. const int16_t **alpSrc, uint8_t *dest, int dstW,
  1002. int y, enum PixelFormat target, int hasAlpha)
  1003. {
  1004. int i;
  1005. for (i = 0; i < (dstW >> 1); i++) {
  1006. int j;
  1007. int Y1 = 1 << 18;
  1008. int Y2 = 1 << 18;
  1009. int U = 1 << 18;
  1010. int V = 1 << 18;
  1011. int av_unused A1, A2;
  1012. const void *r, *g, *b;
  1013. for (j = 0; j < lumFilterSize; j++) {
  1014. Y1 += lumSrc[j][i * 2] * lumFilter[j];
  1015. Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
  1016. }
  1017. for (j = 0; j < chrFilterSize; j++) {
  1018. U += chrUSrc[j][i] * chrFilter[j];
  1019. V += chrVSrc[j][i] * chrFilter[j];
  1020. }
  1021. Y1 >>= 19;
  1022. Y2 >>= 19;
  1023. U >>= 19;
  1024. V >>= 19;
  1025. if (hasAlpha) {
  1026. A1 = 1 << 18;
  1027. A2 = 1 << 18;
  1028. for (j = 0; j < lumFilterSize; j++) {
  1029. A1 += alpSrc[j][i * 2 ] * lumFilter[j];
  1030. A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
  1031. }
  1032. A1 >>= 19;
  1033. A2 >>= 19;
  1034. if ((A1 | A2) & 0x100) {
  1035. A1 = av_clip_uint8(A1);
  1036. A2 = av_clip_uint8(A2);
  1037. }
  1038. }
  1039. r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
  1040. g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
  1041. b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
  1042. yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
  1043. r, g, b, y, target, hasAlpha);
  1044. }
  1045. }
  1046. static av_always_inline void
  1047. yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
  1048. const int16_t *ubuf[2], const int16_t *vbuf[2],
  1049. const int16_t *abuf[2], uint8_t *dest, int dstW,
  1050. int yalpha, int uvalpha, int y,
  1051. enum PixelFormat target, int hasAlpha)
  1052. {
  1053. const int16_t *buf0 = buf[0], *buf1 = buf[1],
  1054. *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  1055. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
  1056. *abuf0 = hasAlpha ? abuf[0] : NULL,
  1057. *abuf1 = hasAlpha ? abuf[1] : NULL;
  1058. int yalpha1 = 4095 - yalpha;
  1059. int uvalpha1 = 4095 - uvalpha;
  1060. int i;
  1061. for (i = 0; i < (dstW >> 1); i++) {
  1062. int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
  1063. int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
  1064. int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
  1065. int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
  1066. int A1, A2;
  1067. const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
  1068. *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
  1069. *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
  1070. if (hasAlpha) {
  1071. A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
  1072. A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
  1073. }
  1074. yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
  1075. r, g, b, y, target, hasAlpha);
  1076. }
  1077. }
  1078. static av_always_inline void
  1079. yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
  1080. const int16_t *ubuf[2], const int16_t *vbuf[2],
  1081. const int16_t *abuf0, uint8_t *dest, int dstW,
  1082. int uvalpha, int y, enum PixelFormat target,
  1083. int hasAlpha)
  1084. {
  1085. const int16_t *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
  1086. *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
  1087. int i;
  1088. if (uvalpha < 2048) {
  1089. for (i = 0; i < (dstW >> 1); i++) {
  1090. int Y1 = buf0[i * 2] >> 7;
  1091. int Y2 = buf0[i * 2 + 1] >> 7;
  1092. int U = ubuf1[i] >> 7;
  1093. int V = vbuf1[i] >> 7;
  1094. int A1, A2;
  1095. const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
  1096. *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
  1097. *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
  1098. if (hasAlpha) {
  1099. A1 = abuf0[i * 2 ] >> 7;
  1100. A2 = abuf0[i * 2 + 1] >> 7;
  1101. }
  1102. yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
  1103. r, g, b, y, target, hasAlpha);
  1104. }
  1105. } else {
  1106. for (i = 0; i < (dstW >> 1); i++) {
  1107. int Y1 = buf0[i * 2] >> 7;
  1108. int Y2 = buf0[i * 2 + 1] >> 7;
  1109. int U = (ubuf0[i] + ubuf1[i]) >> 8;
  1110. int V = (vbuf0[i] + vbuf1[i]) >> 8;
  1111. int A1, A2;
  1112. const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
  1113. *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
  1114. *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
  1115. if (hasAlpha) {
  1116. A1 = abuf0[i * 2 ] >> 7;
  1117. A2 = abuf0[i * 2 + 1] >> 7;
  1118. }
  1119. yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
  1120. r, g, b, y, target, hasAlpha);
  1121. }
  1122. }
  1123. }
  1124. #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
  1125. static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
  1126. const int16_t **lumSrc, int lumFilterSize, \
  1127. const int16_t *chrFilter, const int16_t **chrUSrc, \
  1128. const int16_t **chrVSrc, int chrFilterSize, \
  1129. const int16_t **alpSrc, uint8_t *dest, int dstW, \
  1130. int y) \
  1131. { \
  1132. name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
  1133. chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
  1134. alpSrc, dest, dstW, y, fmt, hasAlpha); \
  1135. }
  1136. #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
  1137. YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
  1138. static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
  1139. const int16_t *ubuf[2], const int16_t *vbuf[2], \
  1140. const int16_t *abuf[2], uint8_t *dest, int dstW, \
  1141. int yalpha, int uvalpha, int y) \
  1142. { \
  1143. name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
  1144. dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
  1145. } \
  1146. \
  1147. static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
  1148. const int16_t *ubuf[2], const int16_t *vbuf[2], \
  1149. const int16_t *abuf0, uint8_t *dest, int dstW, \
  1150. int uvalpha, int y) \
  1151. { \
  1152. name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
  1153. dstW, uvalpha, y, fmt, hasAlpha); \
  1154. }
  1155. #if CONFIG_SMALL
  1156. YUV2RGBWRAPPER(yuv2rgb,, 32_1, PIX_FMT_RGB32_1, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1157. YUV2RGBWRAPPER(yuv2rgb,, 32, PIX_FMT_RGB32, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1158. #else
  1159. #if CONFIG_SWSCALE_ALPHA
  1160. YUV2RGBWRAPPER(yuv2rgb,, a32_1, PIX_FMT_RGB32_1, 1)
  1161. YUV2RGBWRAPPER(yuv2rgb,, a32, PIX_FMT_RGB32, 1)
  1162. #endif
  1163. YUV2RGBWRAPPER(yuv2rgb,, x32_1, PIX_FMT_RGB32_1, 0)
  1164. YUV2RGBWRAPPER(yuv2rgb,, x32, PIX_FMT_RGB32, 0)
  1165. #endif
  1166. YUV2RGBWRAPPER(yuv2, rgb, rgb24, PIX_FMT_RGB24, 0)
  1167. YUV2RGBWRAPPER(yuv2, rgb, bgr24, PIX_FMT_BGR24, 0)
  1168. YUV2RGBWRAPPER(yuv2rgb,, 16, PIX_FMT_RGB565, 0)
  1169. YUV2RGBWRAPPER(yuv2rgb,, 15, PIX_FMT_RGB555, 0)
  1170. YUV2RGBWRAPPER(yuv2rgb,, 12, PIX_FMT_RGB444, 0)
  1171. YUV2RGBWRAPPER(yuv2rgb,, 8, PIX_FMT_RGB8, 0)
  1172. YUV2RGBWRAPPER(yuv2rgb,, 4, PIX_FMT_RGB4, 0)
  1173. YUV2RGBWRAPPER(yuv2rgb,, 4b, PIX_FMT_RGB4_BYTE, 0)
  1174. static av_always_inline void
  1175. yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
  1176. const int16_t **lumSrc, int lumFilterSize,
  1177. const int16_t *chrFilter, const int16_t **chrUSrc,
  1178. const int16_t **chrVSrc, int chrFilterSize,
  1179. const int16_t **alpSrc, uint8_t *dest,
  1180. int dstW, int y, enum PixelFormat target, int hasAlpha)
  1181. {
  1182. int i;
  1183. int step = (target == PIX_FMT_RGB24 || target == PIX_FMT_BGR24) ? 3 : 4;
  1184. for (i = 0; i < dstW; i++) {
  1185. int j;
  1186. int Y = 1<<9;
  1187. int U = (1<<9)-(128 << 19);
  1188. int V = (1<<9)-(128 << 19);
  1189. int av_unused A;
  1190. int R, G, B;
  1191. for (j = 0; j < lumFilterSize; j++) {
  1192. Y += lumSrc[j][i] * lumFilter[j];
  1193. }
  1194. for (j = 0; j < chrFilterSize; j++) {
  1195. U += chrUSrc[j][i] * chrFilter[j];
  1196. V += chrVSrc[j][i] * chrFilter[j];
  1197. }
  1198. Y >>= 10;
  1199. U >>= 10;
  1200. V >>= 10;
  1201. if (hasAlpha) {
  1202. A = 1 << 18;
  1203. for (j = 0; j < lumFilterSize; j++) {
  1204. A += alpSrc[j][i] * lumFilter[j];
  1205. }
  1206. A >>= 19;
  1207. if (A & 0x100)
  1208. A = av_clip_uint8(A);
  1209. }
  1210. Y -= c->yuv2rgb_y_offset;
  1211. Y *= c->yuv2rgb_y_coeff;
  1212. Y += 1 << 21;
  1213. R = Y + V*c->yuv2rgb_v2r_coeff;
  1214. G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
  1215. B = Y + U*c->yuv2rgb_u2b_coeff;
  1216. if ((R | G | B) & 0xC0000000) {
  1217. R = av_clip_uintp2(R, 30);
  1218. G = av_clip_uintp2(G, 30);
  1219. B = av_clip_uintp2(B, 30);
  1220. }
  1221. switch(target) {
  1222. case PIX_FMT_ARGB:
  1223. dest[0] = hasAlpha ? A : 255;
  1224. dest[1] = R >> 22;
  1225. dest[2] = G >> 22;
  1226. dest[3] = B >> 22;
  1227. break;
  1228. case PIX_FMT_RGB24:
  1229. dest[0] = R >> 22;
  1230. dest[1] = G >> 22;
  1231. dest[2] = B >> 22;
  1232. break;
  1233. case PIX_FMT_RGBA:
  1234. dest[0] = R >> 22;
  1235. dest[1] = G >> 22;
  1236. dest[2] = B >> 22;
  1237. dest[3] = hasAlpha ? A : 255;
  1238. break;
  1239. case PIX_FMT_ABGR:
  1240. dest[0] = hasAlpha ? A : 255;
  1241. dest[1] = B >> 22;
  1242. dest[2] = G >> 22;
  1243. dest[3] = R >> 22;
  1244. break;
  1245. case PIX_FMT_BGR24:
  1246. dest[0] = B >> 22;
  1247. dest[1] = G >> 22;
  1248. dest[2] = R >> 22;
  1249. break;
  1250. case PIX_FMT_BGRA:
  1251. dest[0] = B >> 22;
  1252. dest[1] = G >> 22;
  1253. dest[2] = R >> 22;
  1254. dest[3] = hasAlpha ? A : 255;
  1255. break;
  1256. }
  1257. dest += step;
  1258. }
  1259. }
  1260. #if CONFIG_SMALL
  1261. YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1262. YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1263. YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1264. YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
  1265. #else
  1266. #if CONFIG_SWSCALE_ALPHA
  1267. YUV2RGBWRAPPERX(yuv2, rgb_full, bgra32_full, PIX_FMT_BGRA, 1)
  1268. YUV2RGBWRAPPERX(yuv2, rgb_full, abgr32_full, PIX_FMT_ABGR, 1)
  1269. YUV2RGBWRAPPERX(yuv2, rgb_full, rgba32_full, PIX_FMT_RGBA, 1)
  1270. YUV2RGBWRAPPERX(yuv2, rgb_full, argb32_full, PIX_FMT_ARGB, 1)
  1271. #endif
  1272. YUV2RGBWRAPPERX(yuv2, rgb_full, bgrx32_full, PIX_FMT_BGRA, 0)
  1273. YUV2RGBWRAPPERX(yuv2, rgb_full, xbgr32_full, PIX_FMT_ABGR, 0)
  1274. YUV2RGBWRAPPERX(yuv2, rgb_full, rgbx32_full, PIX_FMT_RGBA, 0)
  1275. YUV2RGBWRAPPERX(yuv2, rgb_full, xrgb32_full, PIX_FMT_ARGB, 0)
  1276. #endif
  1277. YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
  1278. YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
  1279. static av_always_inline void fillPlane(uint8_t* plane, int stride,
  1280. int width, int height,
  1281. int y, uint8_t val)
  1282. {
  1283. int i;
  1284. uint8_t *ptr = plane + stride*y;
  1285. for (i=0; i<height; i++) {
  1286. memset(ptr, val, width);
  1287. ptr += stride;
  1288. }
  1289. }
  1290. #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
  1291. #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
  1292. #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
  1293. static av_always_inline void
  1294. rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
  1295. enum PixelFormat origin)
  1296. {
  1297. int i;
  1298. for (i = 0; i < width; i++) {
  1299. unsigned int r_b = input_pixel(&src[i*3+0]);
  1300. unsigned int g = input_pixel(&src[i*3+1]);
  1301. unsigned int b_r = input_pixel(&src[i*3+2]);
  1302. dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
  1303. }
  1304. }
  1305. static av_always_inline void
  1306. rgb48ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
  1307. const uint16_t *src1, const uint16_t *src2,
  1308. int width, enum PixelFormat origin)
  1309. {
  1310. int i;
  1311. assert(src1==src2);
  1312. for (i = 0; i < width; i++) {
  1313. int r_b = input_pixel(&src1[i*3+0]);
  1314. int g = input_pixel(&src1[i*3+1]);
  1315. int b_r = input_pixel(&src1[i*3+2]);
  1316. dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
  1317. dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
  1318. }
  1319. }
  1320. static av_always_inline void
  1321. rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
  1322. const uint16_t *src1, const uint16_t *src2,
  1323. int width, enum PixelFormat origin)
  1324. {
  1325. int i;
  1326. assert(src1==src2);
  1327. for (i = 0; i < width; i++) {
  1328. int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1;
  1329. int g = (input_pixel(&src1[6 * i + 1]) + input_pixel(&src1[6 * i + 4]) + 1) >> 1;
  1330. int b_r = (input_pixel(&src1[6 * i + 2]) + input_pixel(&src1[6 * i + 5]) + 1) >> 1;
  1331. dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
  1332. dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
  1333. }
  1334. }
  1335. #undef r
  1336. #undef b
  1337. #undef input_pixel
  1338. #define rgb48funcs(pattern, BE_LE, origin) \
  1339. static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
  1340. int width, uint32_t *unused) \
  1341. { \
  1342. const uint16_t *src = (const uint16_t *) _src; \
  1343. uint16_t *dst = (uint16_t *) _dst; \
  1344. rgb48ToY_c_template(dst, src, width, origin); \
  1345. } \
  1346. \
  1347. static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
  1348. const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
  1349. int width, uint32_t *unused) \
  1350. { \
  1351. const uint16_t *src1 = (const uint16_t *) _src1, \
  1352. *src2 = (const uint16_t *) _src2; \
  1353. uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
  1354. rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
  1355. } \
  1356. \
  1357. static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
  1358. const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
  1359. int width, uint32_t *unused) \
  1360. { \
  1361. const uint16_t *src1 = (const uint16_t *) _src1, \
  1362. *src2 = (const uint16_t *) _src2; \
  1363. uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
  1364. rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
  1365. }
  1366. rgb48funcs(rgb, LE, PIX_FMT_RGB48LE)
  1367. rgb48funcs(rgb, BE, PIX_FMT_RGB48BE)
  1368. rgb48funcs(bgr, LE, PIX_FMT_BGR48LE)
  1369. rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
  1370. #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
  1371. origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
  1372. (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
  1373. static av_always_inline void
  1374. rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
  1375. int width, enum PixelFormat origin,
  1376. int shr, int shg, int shb, int shp,
  1377. int maskr, int maskg, int maskb,
  1378. int rsh, int gsh, int bsh, int S)
  1379. {
  1380. const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
  1381. const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
  1382. int i;
  1383. for (i = 0; i < width; i++) {
  1384. int px = input_pixel(i) >> shp;
  1385. int b = (px & maskb) >> shb;
  1386. int g = (px & maskg) >> shg;
  1387. int r = (px & maskr) >> shr;
  1388. dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
  1389. }
  1390. }
  1391. static av_always_inline void
  1392. rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
  1393. const uint8_t *src, int width,
  1394. enum PixelFormat origin,
  1395. int shr, int shg, int shb, int shp,
  1396. int maskr, int maskg, int maskb,
  1397. int rsh, int gsh, int bsh, int S)
  1398. {
  1399. const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
  1400. rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
  1401. const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
  1402. int i;
  1403. for (i = 0; i < width; i++) {
  1404. int px = input_pixel(i) >> shp;
  1405. int b = (px & maskb) >> shb;
  1406. int g = (px & maskg) >> shg;
  1407. int r = (px & maskr) >> shr;
  1408. dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
  1409. dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
  1410. }
  1411. }
  1412. static av_always_inline void
  1413. rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
  1414. const uint8_t *src, int width,
  1415. enum PixelFormat origin,
  1416. int shr, int shg, int shb, int shp,
  1417. int maskr, int maskg, int maskb,
  1418. int rsh, int gsh, int bsh, int S)
  1419. {
  1420. const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
  1421. rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
  1422. maskgx = ~(maskr | maskb);
  1423. const unsigned rnd = (256U<<(S)) + (1<<(S-6));
  1424. int i;
  1425. maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
  1426. for (i = 0; i < width; i++) {
  1427. int px0 = input_pixel(2 * i + 0) >> shp;
  1428. int px1 = input_pixel(2 * i + 1) >> shp;
  1429. int b, r, g = (px0 & maskgx) + (px1 & maskgx);
  1430. int rb = px0 + px1 - g;
  1431. b = (rb & maskb) >> shb;
  1432. if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
  1433. origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
  1434. g >>= shg;
  1435. } else {
  1436. g = (g & maskg) >> shg;
  1437. }
  1438. r = (rb & maskr) >> shr;
  1439. dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
  1440. dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
  1441. }
  1442. }
  1443. #undef input_pixel
  1444. #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
  1445. maskg, maskb, rsh, gsh, bsh, S) \
  1446. static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
  1447. int width, uint32_t *unused) \
  1448. { \
  1449. rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
  1450. shr, shg, shb, shp, \
  1451. maskr, maskg, maskb, rsh, gsh, bsh, S); \
  1452. } \
  1453. \
  1454. static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
  1455. const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
  1456. int width, uint32_t *unused) \
  1457. { \
  1458. rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
  1459. shr, shg, shb, shp, \
  1460. maskr, maskg, maskb, rsh, gsh, bsh, S); \
  1461. } \
  1462. \
  1463. static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
  1464. const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
  1465. int width, uint32_t *unused) \
  1466. { \
  1467. rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
  1468. shr, shg, shb, shp, \
  1469. maskr, maskg, maskb, rsh, gsh, bsh, S); \
  1470. }
  1471. rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
  1472. rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8)
  1473. rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
  1474. rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8)
  1475. rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
  1476. rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
  1477. rgb16_32_wrapper(PIX_FMT_BGR444LE, bgr12le, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
  1478. rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
  1479. rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
  1480. rgb16_32_wrapper(PIX_FMT_RGB444LE, rgb12le, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
  1481. rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8)
  1482. rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7)
  1483. rgb16_32_wrapper(PIX_FMT_BGR444BE, bgr12be, 0, 0, 0, 0, 0x000F, 0x00F0, 0x0F00, 8, 4, 0, RGB2YUV_SHIFT+4)
  1484. rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8)
  1485. rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7)
  1486. rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT+4)
  1487. static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
  1488. const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
  1489. int width, enum PixelFormat origin)
  1490. {
  1491. int i;
  1492. for (i = 0; i < width; i++) {
  1493. unsigned int g = gsrc[2*i] + gsrc[2*i+1];
  1494. unsigned int b = bsrc[2*i] + bsrc[2*i+1];
  1495. unsigned int r = rsrc[2*i] + rsrc[2*i+1];
  1496. dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
  1497. dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
  1498. }
  1499. }
  1500. static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
  1501. {
  1502. int i;
  1503. for (i=0; i<width; i++) {
  1504. dst[i]= src[4*i]<<6;
  1505. }
  1506. }
  1507. static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
  1508. {
  1509. int i;
  1510. for (i=0; i<width; i++) {
  1511. dst[i]= src[4*i+3]<<6;
  1512. }
  1513. }
  1514. static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
  1515. {
  1516. int i;
  1517. for (i=0; i<width; i++) {
  1518. int d= src[i];
  1519. dst[i]= (pal[d] >> 24)<<6;
  1520. }
  1521. }
  1522. static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
  1523. {
  1524. int i;
  1525. for (i=0; i<width; i++) {
  1526. int d= src[i];
  1527. dst[i]= (pal[d] & 0xFF)<<6;
  1528. }
  1529. }
  1530. static void palToUV_c(uint16_t *dstU, int16_t *dstV,
  1531. const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
  1532. int width, uint32_t *pal)
  1533. {
  1534. int i;
  1535. assert(src1 == src2);
  1536. for (i=0; i<width; i++) {
  1537. int p= pal[src1[i]];
  1538. dstU[i]= (uint8_t)(p>> 8)<<6;
  1539. dstV[i]= (uint8_t)(p>>16)<<6;
  1540. }
  1541. }
  1542. static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
  1543. {
  1544. int i, j;
  1545. for (i=0; i<width/8; i++) {
  1546. int d= ~src[i];
  1547. for(j=0; j<8; j++)
  1548. dst[8*i+j]= ((d>>(7-j))&1)*16383;
  1549. }
  1550. if(width&7){
  1551. int d= ~src[i];
  1552. for(j=0; j<(width&7); j++)
  1553. dst[8*i+j]= ((d>>(7-j))&1)*16383;
  1554. }
  1555. }
  1556. static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
  1557. {
  1558. int i, j;
  1559. for (i=0; i<width/8; i++) {
  1560. int d= src[i];
  1561. for(j=0; j<8; j++)
  1562. dst[8*i+j]= ((d>>(7-j))&1)*16383;
  1563. }
  1564. if(width&7){
  1565. int d= src[i];
  1566. for(j=0; j<(width&7); j++)
  1567. dst[8*i+j]= ((d>>(7-j))&1)*16383;
  1568. }
  1569. }
  1570. //FIXME yuy2* can read up to 7 samples too much
  1571. static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
  1572. uint32_t *unused)
  1573. {
  1574. int i;
  1575. for (i=0; i<width; i++)
  1576. dst[i]= src[2*i];
  1577. }
  1578. static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1579. const uint8_t *src2, int width, uint32_t *unused)
  1580. {
  1581. int i;
  1582. for (i=0; i<width; i++) {
  1583. dstU[i]= src1[4*i + 1];
  1584. dstV[i]= src1[4*i + 3];
  1585. }
  1586. assert(src1 == src2);
  1587. }
  1588. static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
  1589. {
  1590. int i;
  1591. const uint16_t *src = (const uint16_t *) _src;
  1592. uint16_t *dst = (uint16_t *) _dst;
  1593. for (i=0; i<width; i++) {
  1594. dst[i] = av_bswap16(src[i]);
  1595. }
  1596. }
  1597. static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
  1598. const uint8_t *_src2, int width, uint32_t *unused)
  1599. {
  1600. int i;
  1601. const uint16_t *src1 = (const uint16_t *) _src1,
  1602. *src2 = (const uint16_t *) _src2;
  1603. uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV;
  1604. for (i=0; i<width; i++) {
  1605. dstU[i] = av_bswap16(src1[i]);
  1606. dstV[i] = av_bswap16(src2[i]);
  1607. }
  1608. }
  1609. /* This is almost identical to the previous, end exists only because
  1610. * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
  1611. static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
  1612. uint32_t *unused)
  1613. {
  1614. int i;
  1615. for (i=0; i<width; i++)
  1616. dst[i]= src[2*i+1];
  1617. }
  1618. static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1619. const uint8_t *src2, int width, uint32_t *unused)
  1620. {
  1621. int i;
  1622. for (i=0; i<width; i++) {
  1623. dstU[i]= src1[4*i + 0];
  1624. dstV[i]= src1[4*i + 2];
  1625. }
  1626. assert(src1 == src2);
  1627. }
  1628. static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
  1629. const uint8_t *src, int width)
  1630. {
  1631. int i;
  1632. for (i = 0; i < width; i++) {
  1633. dst1[i] = src[2*i+0];
  1634. dst2[i] = src[2*i+1];
  1635. }
  1636. }
  1637. static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
  1638. const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
  1639. int width, uint32_t *unused)
  1640. {
  1641. nvXXtoUV_c(dstU, dstV, src1, width);
  1642. }
  1643. static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
  1644. const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
  1645. int width, uint32_t *unused)
  1646. {
  1647. nvXXtoUV_c(dstV, dstU, src1, width);
  1648. }
  1649. #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
  1650. static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
  1651. int width, uint32_t *unused)
  1652. {
  1653. int i;
  1654. for (i=0; i<width; i++) {
  1655. int b= src[i*3+0];
  1656. int g= src[i*3+1];
  1657. int r= src[i*3+2];
  1658. dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
  1659. }
  1660. }
  1661. static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1662. const uint8_t *src2, int width, uint32_t *unused)
  1663. {
  1664. int i;
  1665. for (i=0; i<width; i++) {
  1666. int b= src1[3*i + 0];
  1667. int g= src1[3*i + 1];
  1668. int r= src1[3*i + 2];
  1669. dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
  1670. dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
  1671. }
  1672. assert(src1 == src2);
  1673. }
  1674. static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1675. const uint8_t *src2, int width, uint32_t *unused)
  1676. {
  1677. int i;
  1678. for (i=0; i<width; i++) {
  1679. int b= src1[6*i + 0] + src1[6*i + 3];
  1680. int g= src1[6*i + 1] + src1[6*i + 4];
  1681. int r= src1[6*i + 2] + src1[6*i + 5];
  1682. dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
  1683. dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
  1684. }
  1685. assert(src1 == src2);
  1686. }
  1687. static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
  1688. uint32_t *unused)
  1689. {
  1690. int i;
  1691. for (i=0; i<width; i++) {
  1692. int r= src[i*3+0];
  1693. int g= src[i*3+1];
  1694. int b= src[i*3+2];
  1695. dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
  1696. }
  1697. }
  1698. static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1699. const uint8_t *src2, int width, uint32_t *unused)
  1700. {
  1701. int i;
  1702. assert(src1==src2);
  1703. for (i=0; i<width; i++) {
  1704. int r= src1[3*i + 0];
  1705. int g= src1[3*i + 1];
  1706. int b= src1[3*i + 2];
  1707. dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
  1708. dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
  1709. }
  1710. }
  1711. static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
  1712. const uint8_t *src2, int width, uint32_t *unused)
  1713. {
  1714. int i;
  1715. assert(src1==src2);
  1716. for (i=0; i<width; i++) {
  1717. int r= src1[6*i + 0] + src1[6*i + 3];
  1718. int g= src1[6*i + 1] + src1[6*i + 4];
  1719. int b= src1[6*i + 2] + src1[6*i + 5];
  1720. dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
  1721. dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
  1722. }
  1723. }
  1724. static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width)
  1725. {
  1726. int i;
  1727. for (i = 0; i < width; i++) {
  1728. int g = src[0][i];
  1729. int b = src[1][i];
  1730. int r = src[2][i];
  1731. dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
  1732. }
  1733. }
  1734. static void planar_rgb16le_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
  1735. {
  1736. int i;
  1737. const uint16_t **src = (const uint16_t **) _src;
  1738. uint16_t *dst = (uint16_t *) _dst;
  1739. for (i = 0; i < width; i++) {
  1740. int g = AV_RL16(src[0] + i);
  1741. int b = AV_RL16(src[1] + i);
  1742. int r = AV_RL16(src[2] + i);
  1743. dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
  1744. }
  1745. }
  1746. static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width)
  1747. {
  1748. int i;
  1749. const uint16_t **src = (const uint16_t **) _src;
  1750. uint16_t *dst = (uint16_t *) _dst;
  1751. for (i = 0; i < width; i++) {
  1752. int g = AV_RB16(src[0] + i);
  1753. int b = AV_RB16(src[1] + i);
  1754. int r = AV_RB16(src[2] + i);
  1755. dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
  1756. }
  1757. }
  1758. static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width)
  1759. {
  1760. int i;
  1761. for (i = 0; i < width; i++) {
  1762. int g = src[0][i];
  1763. int b = src[1][i];
  1764. int r = src[2][i];
  1765. dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
  1766. dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
  1767. }
  1768. }
  1769. static void planar_rgb16le_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
  1770. {
  1771. int i;
  1772. const uint16_t **src = (const uint16_t **) _src;
  1773. uint16_t *dstU = (uint16_t *) _dstU;
  1774. uint16_t *dstV = (uint16_t *) _dstV;
  1775. for (i = 0; i < width; i++) {
  1776. int g = AV_RL16(src[0] + i);
  1777. int b = AV_RL16(src[1] + i);
  1778. int r = AV_RL16(src[2] + i);
  1779. dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
  1780. dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
  1781. }
  1782. }
  1783. static void planar_rgb16be_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src[4], int width)
  1784. {
  1785. int i;
  1786. const uint16_t **src = (const uint16_t **) _src;
  1787. uint16_t *dstU = (uint16_t *) _dstU;
  1788. uint16_t *dstV = (uint16_t *) _dstV;
  1789. for (i = 0; i < width; i++) {
  1790. int g = AV_RB16(src[0] + i);
  1791. int b = AV_RB16(src[1] + i);
  1792. int r = AV_RB16(src[2] + i);
  1793. dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
  1794. dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
  1795. }
  1796. }
  1797. static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src,
  1798. const int16_t *filter,
  1799. const int32_t *filterPos, int filterSize)
  1800. {
  1801. int i;
  1802. int32_t *dst = (int32_t *) _dst;
  1803. const uint16_t *src = (const uint16_t *) _src;
  1804. int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  1805. int sh = bits - 4;
  1806. if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
  1807. sh= 9;
  1808. for (i = 0; i < dstW; i++) {
  1809. int j;
  1810. int srcPos = filterPos[i];
  1811. int val = 0;
  1812. for (j = 0; j < filterSize; j++) {
  1813. val += src[srcPos + j] * filter[filterSize * i + j];
  1814. }
  1815. // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit
  1816. dst[i] = FFMIN(val >> sh, (1 << 19) - 1);
  1817. }
  1818. }
  1819. static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src,
  1820. const int16_t *filter,
  1821. const int32_t *filterPos, int filterSize)
  1822. {
  1823. int i;
  1824. const uint16_t *src = (const uint16_t *) _src;
  1825. int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  1826. if(sh<15)
  1827. sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
  1828. for (i = 0; i < dstW; i++) {
  1829. int j;
  1830. int srcPos = filterPos[i];
  1831. int val = 0;
  1832. for (j = 0; j < filterSize; j++) {
  1833. val += src[srcPos + j] * filter[filterSize * i + j];
  1834. }
  1835. // filter=14 bit, input=16 bit, output=30 bit, >> 15 makes 15 bit
  1836. dst[i] = FFMIN(val >> sh, (1 << 15) - 1);
  1837. }
  1838. }
  1839. // bilinear / bicubic scaling
  1840. static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
  1841. const int16_t *filter, const int32_t *filterPos,
  1842. int filterSize)
  1843. {
  1844. int i;
  1845. for (i=0; i<dstW; i++) {
  1846. int j;
  1847. int srcPos= filterPos[i];
  1848. int val=0;
  1849. for (j=0; j<filterSize; j++) {
  1850. val += ((int)src[srcPos + j])*filter[filterSize*i + j];
  1851. }
  1852. //filter += hFilterSize;
  1853. dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
  1854. //dst[i] = val>>7;
  1855. }
  1856. }
  1857. static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src,
  1858. const int16_t *filter, const int32_t *filterPos,
  1859. int filterSize)
  1860. {
  1861. int i;
  1862. int32_t *dst = (int32_t *) _dst;
  1863. for (i=0; i<dstW; i++) {
  1864. int j;
  1865. int srcPos= filterPos[i];
  1866. int val=0;
  1867. for (j=0; j<filterSize; j++) {
  1868. val += ((int)src[srcPos + j])*filter[filterSize*i + j];
  1869. }
  1870. //filter += hFilterSize;
  1871. dst[i] = FFMIN(val>>3, (1<<19)-1); // the cubic equation does overflow ...
  1872. //dst[i] = val>>7;
  1873. }
  1874. }
  1875. //FIXME all pal and rgb srcFormats could do this convertion as well
  1876. //FIXME all scalers more complex than bilinear could do half of this transform
  1877. static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
  1878. {
  1879. int i;
  1880. for (i = 0; i < width; i++) {
  1881. dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
  1882. dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
  1883. }
  1884. }
  1885. static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
  1886. {
  1887. int i;
  1888. for (i = 0; i < width; i++) {
  1889. dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
  1890. dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
  1891. }
  1892. }
  1893. static void lumRangeToJpeg_c(int16_t *dst, int width)
  1894. {
  1895. int i;
  1896. for (i = 0; i < width; i++)
  1897. dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
  1898. }
  1899. static void lumRangeFromJpeg_c(int16_t *dst, int width)
  1900. {
  1901. int i;
  1902. for (i = 0; i < width; i++)
  1903. dst[i] = (dst[i]*14071 + 33561947)>>14;
  1904. }
  1905. static void chrRangeToJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
  1906. {
  1907. int i;
  1908. int32_t *dstU = (int32_t *) _dstU;
  1909. int32_t *dstV = (int32_t *) _dstV;
  1910. for (i = 0; i < width; i++) {
  1911. dstU[i] = (FFMIN(dstU[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
  1912. dstV[i] = (FFMIN(dstV[i],30775<<4)*4663 - (9289992<<4))>>12; //-264
  1913. }
  1914. }
  1915. static void chrRangeFromJpeg16_c(int16_t *_dstU, int16_t *_dstV, int width)
  1916. {
  1917. int i;
  1918. int32_t *dstU = (int32_t *) _dstU;
  1919. int32_t *dstV = (int32_t *) _dstV;
  1920. for (i = 0; i < width; i++) {
  1921. dstU[i] = (dstU[i]*1799 + (4081085<<4))>>11; //1469
  1922. dstV[i] = (dstV[i]*1799 + (4081085<<4))>>11; //1469
  1923. }
  1924. }
  1925. static void lumRangeToJpeg16_c(int16_t *_dst, int width)
  1926. {
  1927. int i;
  1928. int32_t *dst = (int32_t *) _dst;
  1929. for (i = 0; i < width; i++)
  1930. dst[i] = (FFMIN(dst[i],30189<<4)*4769 - (39057361<<2))>>12;
  1931. }
  1932. static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
  1933. {
  1934. int i;
  1935. int32_t *dst = (int32_t *) _dst;
  1936. for (i = 0; i < width; i++)
  1937. dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
  1938. }
  1939. static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
  1940. const uint8_t *src, int srcW, int xInc)
  1941. {
  1942. int i;
  1943. unsigned int xpos=0;
  1944. for (i=0;i<dstWidth;i++) {
  1945. register unsigned int xx=xpos>>16;
  1946. register unsigned int xalpha=(xpos&0xFFFF)>>9;
  1947. dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
  1948. xpos+=xInc;
  1949. }
  1950. for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
  1951. dst[i] = src[srcW-1]*128;
  1952. }
  1953. // *** horizontal scale Y line to temp buffer
  1954. static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
  1955. const uint8_t *src_in[4], int srcW, int xInc,
  1956. const int16_t *hLumFilter,
  1957. const int32_t *hLumFilterPos, int hLumFilterSize,
  1958. uint8_t *formatConvBuffer,
  1959. uint32_t *pal, int isAlpha)
  1960. {
  1961. void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
  1962. void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
  1963. const uint8_t *src = src_in[isAlpha ? 3 : 0];
  1964. if (toYV12) {
  1965. toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
  1966. src= formatConvBuffer;
  1967. } else if (c->readLumPlanar && !isAlpha) {
  1968. c->readLumPlanar(formatConvBuffer, src_in, srcW);
  1969. src = formatConvBuffer;
  1970. }
  1971. if (!c->hyscale_fast) {
  1972. c->hyScale(c, dst, dstWidth, src, hLumFilter, hLumFilterPos, hLumFilterSize);
  1973. } else { // fast bilinear upscale / crap downscale
  1974. c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
  1975. }
  1976. if (convertRange)
  1977. convertRange(dst, dstWidth);
  1978. }
  1979. static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
  1980. int dstWidth, const uint8_t *src1,
  1981. const uint8_t *src2, int srcW, int xInc)
  1982. {
  1983. int i;
  1984. unsigned int xpos=0;
  1985. for (i=0;i<dstWidth;i++) {
  1986. register unsigned int xx=xpos>>16;
  1987. register unsigned int xalpha=(xpos&0xFFFF)>>9;
  1988. dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
  1989. dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
  1990. xpos+=xInc;
  1991. }
  1992. for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
  1993. dst1[i] = src1[srcW-1]*128;
  1994. dst2[i] = src2[srcW-1]*128;
  1995. }
  1996. }
  1997. static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
  1998. const uint8_t *src_in[4],
  1999. int srcW, int xInc, const int16_t *hChrFilter,
  2000. const int32_t *hChrFilterPos, int hChrFilterSize,
  2001. uint8_t *formatConvBuffer, uint32_t *pal)
  2002. {
  2003. const uint8_t *src1 = src_in[1], *src2 = src_in[2];
  2004. if (c->chrToYV12) {
  2005. uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
  2006. c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
  2007. src1= formatConvBuffer;
  2008. src2= buf2;
  2009. } else if (c->readChrPlanar) {
  2010. uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
  2011. c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
  2012. src1= formatConvBuffer;
  2013. src2= buf2;
  2014. }
  2015. if (!c->hcscale_fast) {
  2016. c->hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
  2017. c->hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
  2018. } else { // fast bilinear upscale / crap downscale
  2019. c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
  2020. }
  2021. if (c->chrConvertRange)
  2022. c->chrConvertRange(dst1, dst2, dstWidth);
  2023. }
  2024. static av_always_inline void
  2025. find_c_packed_planar_out_funcs(SwsContext *c,
  2026. yuv2planar1_fn *yuv2plane1, yuv2planarX_fn *yuv2planeX,
  2027. yuv2interleavedX_fn *yuv2nv12cX,
  2028. yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
  2029. yuv2packedX_fn *yuv2packedX)
  2030. {
  2031. enum PixelFormat dstFormat = c->dstFormat;
  2032. if (is16BPS(dstFormat)) {
  2033. *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c : yuv2planeX_16LE_c;
  2034. *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c : yuv2plane1_16LE_c;
  2035. } else if (is9_OR_10BPS(dstFormat)) {
  2036. if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
  2037. *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c : yuv2planeX_9LE_c;
  2038. *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c : yuv2plane1_9LE_c;
  2039. } else {
  2040. *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c : yuv2planeX_10LE_c;
  2041. *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c : yuv2plane1_10LE_c;
  2042. }
  2043. } else {
  2044. *yuv2plane1 = yuv2plane1_8_c;
  2045. *yuv2planeX = yuv2planeX_8_c;
  2046. if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)
  2047. *yuv2nv12cX = yuv2nv12cX_c;
  2048. }
  2049. if(c->flags & SWS_FULL_CHR_H_INT) {
  2050. switch (dstFormat) {
  2051. case PIX_FMT_RGBA:
  2052. #if CONFIG_SMALL
  2053. *yuv2packedX = yuv2rgba32_full_X_c;
  2054. #else
  2055. #if CONFIG_SWSCALE_ALPHA
  2056. if (c->alpPixBuf) {
  2057. *yuv2packedX = yuv2rgba32_full_X_c;
  2058. } else
  2059. #endif /* CONFIG_SWSCALE_ALPHA */
  2060. {
  2061. *yuv2packedX = yuv2rgbx32_full_X_c;
  2062. }
  2063. #endif /* !CONFIG_SMALL */
  2064. break;
  2065. case PIX_FMT_ARGB:
  2066. #if CONFIG_SMALL
  2067. *yuv2packedX = yuv2argb32_full_X_c;
  2068. #else
  2069. #if CONFIG_SWSCALE_ALPHA
  2070. if (c->alpPixBuf) {
  2071. *yuv2packedX = yuv2argb32_full_X_c;
  2072. } else
  2073. #endif /* CONFIG_SWSCALE_ALPHA */
  2074. {
  2075. *yuv2packedX = yuv2xrgb32_full_X_c;
  2076. }
  2077. #endif /* !CONFIG_SMALL */
  2078. break;
  2079. case PIX_FMT_BGRA:
  2080. #if CONFIG_SMALL
  2081. *yuv2packedX = yuv2bgra32_full_X_c;
  2082. #else
  2083. #if CONFIG_SWSCALE_ALPHA
  2084. if (c->alpPixBuf) {
  2085. *yuv2packedX = yuv2bgra32_full_X_c;
  2086. } else
  2087. #endif /* CONFIG_SWSCALE_ALPHA */
  2088. {
  2089. *yuv2packedX = yuv2bgrx32_full_X_c;
  2090. }
  2091. #endif /* !CONFIG_SMALL */
  2092. break;
  2093. case PIX_FMT_ABGR:
  2094. #if CONFIG_SMALL
  2095. *yuv2packedX = yuv2abgr32_full_X_c;
  2096. #else
  2097. #if CONFIG_SWSCALE_ALPHA
  2098. if (c->alpPixBuf) {
  2099. *yuv2packedX = yuv2abgr32_full_X_c;
  2100. } else
  2101. #endif /* CONFIG_SWSCALE_ALPHA */
  2102. {
  2103. *yuv2packedX = yuv2xbgr32_full_X_c;
  2104. }
  2105. #endif /* !CONFIG_SMALL */
  2106. break;
  2107. case PIX_FMT_RGB24:
  2108. *yuv2packedX = yuv2rgb24_full_X_c;
  2109. break;
  2110. case PIX_FMT_BGR24:
  2111. *yuv2packedX = yuv2bgr24_full_X_c;
  2112. break;
  2113. }
  2114. if(!*yuv2packedX)
  2115. goto YUV_PACKED;
  2116. } else {
  2117. YUV_PACKED:
  2118. switch (dstFormat) {
  2119. case PIX_FMT_RGB48LE:
  2120. *yuv2packed1 = yuv2rgb48le_1_c;
  2121. *yuv2packed2 = yuv2rgb48le_2_c;
  2122. *yuv2packedX = yuv2rgb48le_X_c;
  2123. break;
  2124. case PIX_FMT_RGB48BE:
  2125. *yuv2packed1 = yuv2rgb48be_1_c;
  2126. *yuv2packed2 = yuv2rgb48be_2_c;
  2127. *yuv2packedX = yuv2rgb48be_X_c;
  2128. break;
  2129. case PIX_FMT_BGR48LE:
  2130. *yuv2packed1 = yuv2bgr48le_1_c;
  2131. *yuv2packed2 = yuv2bgr48le_2_c;
  2132. *yuv2packedX = yuv2bgr48le_X_c;
  2133. break;
  2134. case PIX_FMT_BGR48BE:
  2135. *yuv2packed1 = yuv2bgr48be_1_c;
  2136. *yuv2packed2 = yuv2bgr48be_2_c;
  2137. *yuv2packedX = yuv2bgr48be_X_c;
  2138. break;
  2139. case PIX_FMT_RGB32:
  2140. case PIX_FMT_BGR32:
  2141. #if CONFIG_SMALL
  2142. *yuv2packed1 = yuv2rgb32_1_c;
  2143. *yuv2packed2 = yuv2rgb32_2_c;
  2144. *yuv2packedX = yuv2rgb32_X_c;
  2145. #else
  2146. #if CONFIG_SWSCALE_ALPHA
  2147. if (c->alpPixBuf) {
  2148. *yuv2packed1 = yuv2rgba32_1_c;
  2149. *yuv2packed2 = yuv2rgba32_2_c;
  2150. *yuv2packedX = yuv2rgba32_X_c;
  2151. } else
  2152. #endif /* CONFIG_SWSCALE_ALPHA */
  2153. {
  2154. *yuv2packed1 = yuv2rgbx32_1_c;
  2155. *yuv2packed2 = yuv2rgbx32_2_c;
  2156. *yuv2packedX = yuv2rgbx32_X_c;
  2157. }
  2158. #endif /* !CONFIG_SMALL */
  2159. break;
  2160. case PIX_FMT_RGB32_1:
  2161. case PIX_FMT_BGR32_1:
  2162. #if CONFIG_SMALL
  2163. *yuv2packed1 = yuv2rgb32_1_1_c;
  2164. *yuv2packed2 = yuv2rgb32_1_2_c;
  2165. *yuv2packedX = yuv2rgb32_1_X_c;
  2166. #else
  2167. #if CONFIG_SWSCALE_ALPHA
  2168. if (c->alpPixBuf) {
  2169. *yuv2packed1 = yuv2rgba32_1_1_c;
  2170. *yuv2packed2 = yuv2rgba32_1_2_c;
  2171. *yuv2packedX = yuv2rgba32_1_X_c;
  2172. } else
  2173. #endif /* CONFIG_SWSCALE_ALPHA */
  2174. {
  2175. *yuv2packed1 = yuv2rgbx32_1_1_c;
  2176. *yuv2packed2 = yuv2rgbx32_1_2_c;
  2177. *yuv2packedX = yuv2rgbx32_1_X_c;
  2178. }
  2179. #endif /* !CONFIG_SMALL */
  2180. break;
  2181. case PIX_FMT_RGB24:
  2182. *yuv2packed1 = yuv2rgb24_1_c;
  2183. *yuv2packed2 = yuv2rgb24_2_c;
  2184. *yuv2packedX = yuv2rgb24_X_c;
  2185. break;
  2186. case PIX_FMT_BGR24:
  2187. *yuv2packed1 = yuv2bgr24_1_c;
  2188. *yuv2packed2 = yuv2bgr24_2_c;
  2189. *yuv2packedX = yuv2bgr24_X_c;
  2190. break;
  2191. case PIX_FMT_RGB565LE:
  2192. case PIX_FMT_RGB565BE:
  2193. case PIX_FMT_BGR565LE:
  2194. case PIX_FMT_BGR565BE:
  2195. *yuv2packed1 = yuv2rgb16_1_c;
  2196. *yuv2packed2 = yuv2rgb16_2_c;
  2197. *yuv2packedX = yuv2rgb16_X_c;
  2198. break;
  2199. case PIX_FMT_RGB555LE:
  2200. case PIX_FMT_RGB555BE:
  2201. case PIX_FMT_BGR555LE:
  2202. case PIX_FMT_BGR555BE:
  2203. *yuv2packed1 = yuv2rgb15_1_c;
  2204. *yuv2packed2 = yuv2rgb15_2_c;
  2205. *yuv2packedX = yuv2rgb15_X_c;
  2206. break;
  2207. case PIX_FMT_RGB444LE:
  2208. case PIX_FMT_RGB444BE:
  2209. case PIX_FMT_BGR444LE:
  2210. case PIX_FMT_BGR444BE:
  2211. *yuv2packed1 = yuv2rgb12_1_c;
  2212. *yuv2packed2 = yuv2rgb12_2_c;
  2213. *yuv2packedX = yuv2rgb12_X_c;
  2214. break;
  2215. case PIX_FMT_RGB8:
  2216. case PIX_FMT_BGR8:
  2217. *yuv2packed1 = yuv2rgb8_1_c;
  2218. *yuv2packed2 = yuv2rgb8_2_c;
  2219. *yuv2packedX = yuv2rgb8_X_c;
  2220. break;
  2221. case PIX_FMT_RGB4:
  2222. case PIX_FMT_BGR4:
  2223. *yuv2packed1 = yuv2rgb4_1_c;
  2224. *yuv2packed2 = yuv2rgb4_2_c;
  2225. *yuv2packedX = yuv2rgb4_X_c;
  2226. break;
  2227. case PIX_FMT_RGB4_BYTE:
  2228. case PIX_FMT_BGR4_BYTE:
  2229. *yuv2packed1 = yuv2rgb4b_1_c;
  2230. *yuv2packed2 = yuv2rgb4b_2_c;
  2231. *yuv2packedX = yuv2rgb4b_X_c;
  2232. break;
  2233. }
  2234. }
  2235. switch (dstFormat) {
  2236. case PIX_FMT_GRAY16BE:
  2237. *yuv2packed1 = yuv2gray16BE_1_c;
  2238. *yuv2packed2 = yuv2gray16BE_2_c;
  2239. *yuv2packedX = yuv2gray16BE_X_c;
  2240. break;
  2241. case PIX_FMT_GRAY16LE:
  2242. *yuv2packed1 = yuv2gray16LE_1_c;
  2243. *yuv2packed2 = yuv2gray16LE_2_c;
  2244. *yuv2packedX = yuv2gray16LE_X_c;
  2245. break;
  2246. case PIX_FMT_MONOWHITE:
  2247. *yuv2packed1 = yuv2monowhite_1_c;
  2248. *yuv2packed2 = yuv2monowhite_2_c;
  2249. *yuv2packedX = yuv2monowhite_X_c;
  2250. break;
  2251. case PIX_FMT_MONOBLACK:
  2252. *yuv2packed1 = yuv2monoblack_1_c;
  2253. *yuv2packed2 = yuv2monoblack_2_c;
  2254. *yuv2packedX = yuv2monoblack_X_c;
  2255. break;
  2256. case PIX_FMT_YUYV422:
  2257. *yuv2packed1 = yuv2yuyv422_1_c;
  2258. *yuv2packed2 = yuv2yuyv422_2_c;
  2259. *yuv2packedX = yuv2yuyv422_X_c;
  2260. break;
  2261. case PIX_FMT_UYVY422:
  2262. *yuv2packed1 = yuv2uyvy422_1_c;
  2263. *yuv2packed2 = yuv2uyvy422_2_c;
  2264. *yuv2packedX = yuv2uyvy422_X_c;
  2265. break;
  2266. }
  2267. }
  2268. #define DEBUG_SWSCALE_BUFFERS 0
  2269. #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
  2270. static int swScale(SwsContext *c, const uint8_t* src[],
  2271. int srcStride[], int srcSliceY,
  2272. int srcSliceH, uint8_t* dst[], int dstStride[])
  2273. {
  2274. /* load a few things into local vars to make the code more readable? and faster */
  2275. const int srcW= c->srcW;
  2276. const int dstW= c->dstW;
  2277. const int dstH= c->dstH;
  2278. const int chrDstW= c->chrDstW;
  2279. const int chrSrcW= c->chrSrcW;
  2280. const int lumXInc= c->lumXInc;
  2281. const int chrXInc= c->chrXInc;
  2282. const enum PixelFormat dstFormat= c->dstFormat;
  2283. const int flags= c->flags;
  2284. int32_t *vLumFilterPos= c->vLumFilterPos;
  2285. int32_t *vChrFilterPos= c->vChrFilterPos;
  2286. int32_t *hLumFilterPos= c->hLumFilterPos;
  2287. int32_t *hChrFilterPos= c->hChrFilterPos;
  2288. int16_t *hLumFilter= c->hLumFilter;
  2289. int16_t *hChrFilter= c->hChrFilter;
  2290. int32_t *lumMmxFilter= c->lumMmxFilter;
  2291. int32_t *chrMmxFilter= c->chrMmxFilter;
  2292. int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
  2293. const int vLumFilterSize= c->vLumFilterSize;
  2294. const int vChrFilterSize= c->vChrFilterSize;
  2295. const int hLumFilterSize= c->hLumFilterSize;
  2296. const int hChrFilterSize= c->hChrFilterSize;
  2297. int16_t **lumPixBuf= c->lumPixBuf;
  2298. int16_t **chrUPixBuf= c->chrUPixBuf;
  2299. int16_t **chrVPixBuf= c->chrVPixBuf;
  2300. int16_t **alpPixBuf= c->alpPixBuf;
  2301. const int vLumBufSize= c->vLumBufSize;
  2302. const int vChrBufSize= c->vChrBufSize;
  2303. uint8_t *formatConvBuffer= c->formatConvBuffer;
  2304. const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
  2305. const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
  2306. int lastDstY;
  2307. uint32_t *pal=c->pal_yuv;
  2308. int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
  2309. yuv2planar1_fn yuv2plane1 = c->yuv2plane1;
  2310. yuv2planarX_fn yuv2planeX = c->yuv2planeX;
  2311. yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX;
  2312. yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
  2313. yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
  2314. yuv2packedX_fn yuv2packedX = c->yuv2packedX;
  2315. /* vars which will change and which we need to store back in the context */
  2316. int dstY= c->dstY;
  2317. int lumBufIndex= c->lumBufIndex;
  2318. int chrBufIndex= c->chrBufIndex;
  2319. int lastInLumBuf= c->lastInLumBuf;
  2320. int lastInChrBuf= c->lastInChrBuf;
  2321. if (isPacked(c->srcFormat)) {
  2322. src[0]=
  2323. src[1]=
  2324. src[2]=
  2325. src[3]= src[0];
  2326. srcStride[0]=
  2327. srcStride[1]=
  2328. srcStride[2]=
  2329. srcStride[3]= srcStride[0];
  2330. }
  2331. srcStride[1]<<= c->vChrDrop;
  2332. srcStride[2]<<= c->vChrDrop;
  2333. DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
  2334. src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
  2335. dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
  2336. DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
  2337. srcSliceY, srcSliceH, dstY, dstH);
  2338. DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
  2339. vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
  2340. if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
  2341. static int warnedAlready=0; //FIXME move this into the context perhaps
  2342. if (flags & SWS_PRINT_INFO && !warnedAlready) {
  2343. av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
  2344. " ->cannot do aligned memory accesses anymore\n");
  2345. warnedAlready=1;
  2346. }
  2347. }
  2348. if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
  2349. || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
  2350. || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
  2351. ) {
  2352. static int warnedAlready=0;
  2353. int cpu_flags = av_get_cpu_flags();
  2354. if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
  2355. av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
  2356. warnedAlready=1;
  2357. }
  2358. }
  2359. /* Note the user might start scaling the picture in the middle so this
  2360. will not get executed. This is not really intended but works
  2361. currently, so people might do it. */
  2362. if (srcSliceY ==0) {
  2363. lumBufIndex=-1;
  2364. chrBufIndex=-1;
  2365. dstY=0;
  2366. lastInLumBuf= -1;
  2367. lastInChrBuf= -1;
  2368. }
  2369. if (!should_dither) {
  2370. c->chrDither8 = c->lumDither8 = ff_sws_pb_64;
  2371. }
  2372. lastDstY= dstY;
  2373. for (;dstY < dstH; dstY++) {
  2374. const int chrDstY= dstY>>c->chrDstVSubSample;
  2375. uint8_t *dest[4] = {
  2376. dst[0] + dstStride[0] * dstY,
  2377. dst[1] + dstStride[1] * chrDstY,
  2378. dst[2] + dstStride[2] * chrDstY,
  2379. (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
  2380. };
  2381. int use_mmx_vfilter= c->use_mmx_vfilter;
  2382. const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
  2383. const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
  2384. const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
  2385. int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
  2386. int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
  2387. int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
  2388. int enough_lines;
  2389. //handle holes (FAST_BILINEAR & weird filters)
  2390. if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
  2391. if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
  2392. assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
  2393. assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
  2394. DEBUG_BUFFERS("dstY: %d\n", dstY);
  2395. DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
  2396. firstLumSrcY, lastLumSrcY, lastInLumBuf);
  2397. DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
  2398. firstChrSrcY, lastChrSrcY, lastInChrBuf);
  2399. // Do we have enough lines in this slice to output the dstY line
  2400. enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
  2401. if (!enough_lines) {
  2402. lastLumSrcY = srcSliceY + srcSliceH - 1;
  2403. lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
  2404. DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
  2405. lastLumSrcY, lastChrSrcY);
  2406. }
  2407. //Do horizontal scaling
  2408. while(lastInLumBuf < lastLumSrcY) {
  2409. const uint8_t *src1[4] = {
  2410. src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
  2411. src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
  2412. src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
  2413. src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
  2414. };
  2415. lumBufIndex++;
  2416. assert(lumBufIndex < 2*vLumBufSize);
  2417. assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
  2418. assert(lastInLumBuf + 1 - srcSliceY >= 0);
  2419. hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
  2420. hLumFilter, hLumFilterPos, hLumFilterSize,
  2421. formatConvBuffer,
  2422. pal, 0);
  2423. if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
  2424. hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src1, srcW,
  2425. lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
  2426. formatConvBuffer,
  2427. pal, 1);
  2428. lastInLumBuf++;
  2429. DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
  2430. lumBufIndex, lastInLumBuf);
  2431. }
  2432. while(lastInChrBuf < lastChrSrcY) {
  2433. const uint8_t *src1[4] = {
  2434. src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
  2435. src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
  2436. src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
  2437. src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
  2438. };
  2439. chrBufIndex++;
  2440. assert(chrBufIndex < 2*vChrBufSize);
  2441. assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
  2442. assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
  2443. //FIXME replace parameters through context struct (some at least)
  2444. if (c->needs_hcscale)
  2445. hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
  2446. chrDstW, src1, chrSrcW, chrXInc,
  2447. hChrFilter, hChrFilterPos, hChrFilterSize,
  2448. formatConvBuffer, pal);
  2449. lastInChrBuf++;
  2450. DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
  2451. chrBufIndex, lastInChrBuf);
  2452. }
  2453. //wrap buf index around to stay inside the ring buffer
  2454. if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
  2455. if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
  2456. if (!enough_lines)
  2457. break; //we can't output a dstY line so let's try with the next slice
  2458. #if HAVE_MMX
  2459. updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
  2460. #endif
  2461. if (should_dither) {
  2462. c->chrDither8 = dither_8x8_128[chrDstY & 7];
  2463. c->lumDither8 = dither_8x8_128[dstY & 7];
  2464. }
  2465. if (dstY >= dstH-2) {
  2466. // hmm looks like we can't use MMX here without overwriting this array's tail
  2467. find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
  2468. &yuv2packed1, &yuv2packed2, &yuv2packedX);
  2469. use_mmx_vfilter= 0;
  2470. }
  2471. {
  2472. const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
  2473. const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
  2474. const int16_t **chrVSrcPtr= (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
  2475. const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
  2476. int16_t *vLumFilter= c->vLumFilter;
  2477. int16_t *vChrFilter= c->vChrFilter;
  2478. if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
  2479. const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
  2480. vLumFilter += dstY * vLumFilterSize;
  2481. vChrFilter += chrDstY * vChrFilterSize;
  2482. av_assert0(use_mmx_vfilter != (
  2483. yuv2planeX == yuv2planeX_10BE_c
  2484. || yuv2planeX == yuv2planeX_10LE_c
  2485. || yuv2planeX == yuv2planeX_9BE_c
  2486. || yuv2planeX == yuv2planeX_9LE_c
  2487. || yuv2planeX == yuv2planeX_16BE_c
  2488. || yuv2planeX == yuv2planeX_16LE_c
  2489. || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
  2490. if(use_mmx_vfilter){
  2491. vLumFilter= c->lumMmxFilter;
  2492. vChrFilter= c->chrMmxFilter;
  2493. }
  2494. if (vLumFilterSize == 1) {
  2495. yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
  2496. } else {
  2497. yuv2planeX(vLumFilter, vLumFilterSize,
  2498. lumSrcPtr, dest[0], dstW, c->lumDither8, 0);
  2499. }
  2500. if (!((dstY&chrSkipMask) || isGray(dstFormat))) {
  2501. if (yuv2nv12cX) {
  2502. yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW);
  2503. } else if (vChrFilterSize == 1) {
  2504. yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
  2505. yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
  2506. } else {
  2507. yuv2planeX(vChrFilter, vChrFilterSize,
  2508. chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0);
  2509. yuv2planeX(vChrFilter, vChrFilterSize,
  2510. chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
  2511. }
  2512. }
  2513. if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
  2514. if(use_mmx_vfilter){
  2515. vLumFilter= c->alpMmxFilter;
  2516. }
  2517. if (vLumFilterSize == 1) {
  2518. yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0);
  2519. } else {
  2520. yuv2planeX(vLumFilter, vLumFilterSize,
  2521. alpSrcPtr, dest[3], dstW, c->lumDither8, 0);
  2522. }
  2523. }
  2524. } else {
  2525. assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
  2526. assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
  2527. if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
  2528. int chrAlpha = vChrFilter[2 * dstY + 1];
  2529. yuv2packed1(c, *lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
  2530. alpPixBuf ? *alpSrcPtr : NULL,
  2531. dest[0], dstW, chrAlpha, dstY);
  2532. } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
  2533. int lumAlpha = vLumFilter[2 * dstY + 1];
  2534. int chrAlpha = vChrFilter[2 * dstY + 1];
  2535. lumMmxFilter[2] =
  2536. lumMmxFilter[3] = vLumFilter[2 * dstY ] * 0x10001;
  2537. chrMmxFilter[2] =
  2538. chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
  2539. yuv2packed2(c, lumSrcPtr, chrUSrcPtr, chrVSrcPtr,
  2540. alpPixBuf ? alpSrcPtr : NULL,
  2541. dest[0], dstW, lumAlpha, chrAlpha, dstY);
  2542. } else { //general RGB
  2543. yuv2packedX(c, vLumFilter + dstY * vLumFilterSize,
  2544. lumSrcPtr, vLumFilterSize,
  2545. vChrFilter + dstY * vChrFilterSize,
  2546. chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
  2547. alpSrcPtr, dest[0], dstW, dstY);
  2548. }
  2549. }
  2550. }
  2551. }
  2552. if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
  2553. fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
  2554. #if HAVE_MMX2
  2555. if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
  2556. __asm__ volatile("sfence":::"memory");
  2557. #endif
  2558. emms_c();
  2559. /* store changed local vars back in the context */
  2560. c->dstY= dstY;
  2561. c->lumBufIndex= lumBufIndex;
  2562. c->chrBufIndex= chrBufIndex;
  2563. c->lastInLumBuf= lastInLumBuf;
  2564. c->lastInChrBuf= lastInChrBuf;
  2565. return dstY - lastDstY;
  2566. }
  2567. static av_cold void sws_init_swScale_c(SwsContext *c)
  2568. {
  2569. enum PixelFormat srcFormat = c->srcFormat;
  2570. find_c_packed_planar_out_funcs(c, &c->yuv2plane1, &c->yuv2planeX,
  2571. &c->yuv2nv12cX, &c->yuv2packed1, &c->yuv2packed2,
  2572. &c->yuv2packedX);
  2573. c->chrToYV12 = NULL;
  2574. switch(srcFormat) {
  2575. case PIX_FMT_YUYV422 : c->chrToYV12 = yuy2ToUV_c; break;
  2576. case PIX_FMT_UYVY422 : c->chrToYV12 = uyvyToUV_c; break;
  2577. case PIX_FMT_NV12 : c->chrToYV12 = nv12ToUV_c; break;
  2578. case PIX_FMT_NV21 : c->chrToYV12 = nv21ToUV_c; break;
  2579. case PIX_FMT_RGB8 :
  2580. case PIX_FMT_BGR8 :
  2581. case PIX_FMT_PAL8 :
  2582. case PIX_FMT_BGR4_BYTE:
  2583. case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
  2584. case PIX_FMT_GBRP9LE:
  2585. case PIX_FMT_GBRP10LE:
  2586. case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break;
  2587. case PIX_FMT_GBRP9BE:
  2588. case PIX_FMT_GBRP10BE:
  2589. case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break;
  2590. case PIX_FMT_GBRP: c->readChrPlanar = planar_rgb_to_uv; break;
  2591. #if HAVE_BIGENDIAN
  2592. case PIX_FMT_YUV444P9LE:
  2593. case PIX_FMT_YUV422P9LE:
  2594. case PIX_FMT_YUV420P9LE:
  2595. case PIX_FMT_YUV422P10LE:
  2596. case PIX_FMT_YUV420P10LE:
  2597. case PIX_FMT_YUV444P10LE:
  2598. case PIX_FMT_YUV420P16LE:
  2599. case PIX_FMT_YUV422P16LE:
  2600. case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
  2601. #else
  2602. case PIX_FMT_YUV444P9BE:
  2603. case PIX_FMT_YUV422P9BE:
  2604. case PIX_FMT_YUV420P9BE:
  2605. case PIX_FMT_YUV444P10BE:
  2606. case PIX_FMT_YUV422P10BE:
  2607. case PIX_FMT_YUV420P10BE:
  2608. case PIX_FMT_YUV420P16BE:
  2609. case PIX_FMT_YUV422P16BE:
  2610. case PIX_FMT_YUV444P16BE: c->chrToYV12 = bswap16UV_c; break;
  2611. #endif
  2612. }
  2613. if (c->chrSrcHSubSample) {
  2614. switch(srcFormat) {
  2615. case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
  2616. case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
  2617. case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
  2618. case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
  2619. case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half_c; break;
  2620. case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c; break;
  2621. case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_half_c; break;
  2622. case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
  2623. case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
  2624. case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
  2625. case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
  2626. case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break;
  2627. case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_half_c; break;
  2628. case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half_c; break;
  2629. case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c; break;
  2630. case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_half_c; break;
  2631. case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
  2632. case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
  2633. case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
  2634. case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
  2635. case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break;
  2636. case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_half_c; break;
  2637. case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_half_c; break;
  2638. }
  2639. } else {
  2640. switch(srcFormat) {
  2641. case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
  2642. case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
  2643. case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
  2644. case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
  2645. case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_c; break;
  2646. case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c; break;
  2647. case PIX_FMT_BGR24 : c->chrToYV12 = bgr24ToUV_c; break;
  2648. case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
  2649. case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
  2650. case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
  2651. case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
  2652. case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_c; break;
  2653. case PIX_FMT_BGR444BE: c->chrToYV12 = bgr12beToUV_c; break;
  2654. case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_c; break;
  2655. case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c; break;
  2656. case PIX_FMT_RGB24 : c->chrToYV12 = rgb24ToUV_c; break;
  2657. case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
  2658. case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
  2659. case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
  2660. case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
  2661. case PIX_FMT_RGB444LE: c->chrToYV12 = rgb12leToUV_c; break;
  2662. case PIX_FMT_RGB444BE: c->chrToYV12 = rgb12beToUV_c; break;
  2663. }
  2664. }
  2665. c->lumToYV12 = NULL;
  2666. c->alpToYV12 = NULL;
  2667. switch (srcFormat) {
  2668. case PIX_FMT_GBRP9LE:
  2669. case PIX_FMT_GBRP10LE:
  2670. case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break;
  2671. case PIX_FMT_GBRP9BE:
  2672. case PIX_FMT_GBRP10BE:
  2673. case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break;
  2674. case PIX_FMT_GBRP: c->readLumPlanar = planar_rgb_to_y; break;
  2675. #if HAVE_BIGENDIAN
  2676. case PIX_FMT_YUV444P9LE:
  2677. case PIX_FMT_YUV422P9LE:
  2678. case PIX_FMT_YUV420P9LE:
  2679. case PIX_FMT_YUV422P10LE:
  2680. case PIX_FMT_YUV420P10LE:
  2681. case PIX_FMT_YUV444P10LE:
  2682. case PIX_FMT_YUV420P16LE:
  2683. case PIX_FMT_YUV422P16LE:
  2684. case PIX_FMT_YUV444P16LE:
  2685. case PIX_FMT_GRAY16LE: c->lumToYV12 = bswap16Y_c; break;
  2686. #else
  2687. case PIX_FMT_YUV444P9BE:
  2688. case PIX_FMT_YUV422P9BE:
  2689. case PIX_FMT_YUV420P9BE:
  2690. case PIX_FMT_YUV444P10BE:
  2691. case PIX_FMT_YUV422P10BE:
  2692. case PIX_FMT_YUV420P10BE:
  2693. case PIX_FMT_YUV420P16BE:
  2694. case PIX_FMT_YUV422P16BE:
  2695. case PIX_FMT_YUV444P16BE:
  2696. case PIX_FMT_GRAY16BE: c->lumToYV12 = bswap16Y_c; break;
  2697. #endif
  2698. case PIX_FMT_YUYV422 :
  2699. case PIX_FMT_Y400A : c->lumToYV12 = yuy2ToY_c; break;
  2700. case PIX_FMT_UYVY422 : c->lumToYV12 = uyvyToY_c; break;
  2701. case PIX_FMT_BGR24 : c->lumToYV12 = bgr24ToY_c; break;
  2702. case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
  2703. case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
  2704. case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
  2705. case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
  2706. case PIX_FMT_BGR444LE : c->lumToYV12 = bgr12leToY_c; break;
  2707. case PIX_FMT_BGR444BE : c->lumToYV12 = bgr12beToY_c; break;
  2708. case PIX_FMT_RGB24 : c->lumToYV12 = rgb24ToY_c; break;
  2709. case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
  2710. case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
  2711. case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
  2712. case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
  2713. case PIX_FMT_RGB444LE : c->lumToYV12 = rgb12leToY_c; break;
  2714. case PIX_FMT_RGB444BE : c->lumToYV12 = rgb12beToY_c; break;
  2715. case PIX_FMT_RGB8 :
  2716. case PIX_FMT_BGR8 :
  2717. case PIX_FMT_PAL8 :
  2718. case PIX_FMT_BGR4_BYTE:
  2719. case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
  2720. case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
  2721. case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
  2722. case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY_c; break;
  2723. case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
  2724. case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY_c; break;
  2725. case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
  2726. case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
  2727. case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
  2728. case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
  2729. case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
  2730. }
  2731. if (c->alpPixBuf) {
  2732. switch (srcFormat) {
  2733. case PIX_FMT_BGRA:
  2734. case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; break;
  2735. case PIX_FMT_ABGR:
  2736. case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break;
  2737. case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
  2738. case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
  2739. }
  2740. }
  2741. if (c->srcBpc == 8) {
  2742. if (c->dstBpc <= 10) {
  2743. c->hyScale = c->hcScale = hScale8To15_c;
  2744. if (c->flags & SWS_FAST_BILINEAR) {
  2745. c->hyscale_fast = hyscale_fast_c;
  2746. c->hcscale_fast = hcscale_fast_c;
  2747. }
  2748. } else {
  2749. c->hyScale = c->hcScale = hScale8To19_c;
  2750. }
  2751. } else {
  2752. c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c : hScale16To15_c;
  2753. }
  2754. if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
  2755. if (c->dstBpc <= 10) {
  2756. if (c->srcRange) {
  2757. c->lumConvertRange = lumRangeFromJpeg_c;
  2758. c->chrConvertRange = chrRangeFromJpeg_c;
  2759. } else {
  2760. c->lumConvertRange = lumRangeToJpeg_c;
  2761. c->chrConvertRange = chrRangeToJpeg_c;
  2762. }
  2763. } else {
  2764. if (c->srcRange) {
  2765. c->lumConvertRange = lumRangeFromJpeg16_c;
  2766. c->chrConvertRange = chrRangeFromJpeg16_c;
  2767. } else {
  2768. c->lumConvertRange = lumRangeToJpeg16_c;
  2769. c->chrConvertRange = chrRangeToJpeg16_c;
  2770. }
  2771. }
  2772. }
  2773. if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
  2774. srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
  2775. c->needs_hcscale = 1;
  2776. }
  2777. SwsFunc ff_getSwsFunc(SwsContext *c)
  2778. {
  2779. sws_init_swScale_c(c);
  2780. if (HAVE_MMX)
  2781. ff_sws_init_swScale_mmx(c);
  2782. if (HAVE_ALTIVEC)
  2783. ff_sws_init_swScale_altivec(c);
  2784. return swScale;
  2785. }