colorspacedsp_template.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*
  2. * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/avassert.h"
  21. #undef avg
  22. #undef ss
  23. #if SS_W == 0
  24. #define ss 444
  25. #define avg(a,b,c,d) (a)
  26. #elif SS_H == 0
  27. #define ss 422
  28. #define avg(a,b,c,d) (((a) + (b) + 1) >> 1)
  29. #else
  30. #define ss 420
  31. #define avg(a,b,c,d) (((a) + (b) + (c) + (d) + 2) >> 2)
  32. #endif
  33. #undef fn
  34. #undef fn2
  35. #undef fn3
  36. #define fn3(a,b,c) a##_##c##p##b##_c
  37. #define fn2(a,b,c) fn3(a,b,c)
  38. #define fn(a) fn2(a, BIT_DEPTH, ss)
  39. #undef pixel
  40. #undef av_clip_pixel
  41. #if BIT_DEPTH == 8
  42. #define pixel uint8_t
  43. #define av_clip_pixel(x) av_clip_uint8(x)
  44. #else
  45. #define pixel uint16_t
  46. #define av_clip_pixel(x) av_clip_uintp2(x, BIT_DEPTH)
  47. #endif
  48. static void fn(yuv2rgb)(int16_t *rgb[3], ptrdiff_t rgb_stride,
  49. uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
  50. int w, int h, const int16_t yuv2rgb_coeffs[3][3][8],
  51. const int16_t yuv_offset[8])
  52. {
  53. pixel **yuv = (pixel **) _yuv;
  54. const pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
  55. int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
  56. int y, x;
  57. int cy = yuv2rgb_coeffs[0][0][0];
  58. int crv = yuv2rgb_coeffs[0][2][0];
  59. int cgu = yuv2rgb_coeffs[1][1][0];
  60. int cgv = yuv2rgb_coeffs[1][2][0];
  61. int cbu = yuv2rgb_coeffs[2][1][0];
  62. const int sh = BIT_DEPTH - 1, rnd = 1 << (sh - 1);
  63. const int uv_offset = 128 << (BIT_DEPTH - 8);
  64. av_assert2(yuv2rgb_coeffs[0][1][0] == 0);
  65. av_assert2(yuv2rgb_coeffs[2][2][0] == 0);
  66. av_assert2(yuv2rgb_coeffs[1][0][0] == cy && yuv2rgb_coeffs[2][0][0] == cy);
  67. w = AV_CEIL_RSHIFT(w, SS_W);
  68. h = AV_CEIL_RSHIFT(h, SS_H);
  69. for (y = 0; y < h; y++) {
  70. for (x = 0; x < w; x++) {
  71. int y00 = yuv0[x << SS_W] - yuv_offset[0];
  72. #if SS_W == 1
  73. int y01 = yuv0[2 * x + 1] - yuv_offset[0];
  74. #if SS_H == 1
  75. int y10 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x] - yuv_offset[0];
  76. int y11 = yuv0[yuv_stride[0] / sizeof(pixel) + 2 * x + 1] - yuv_offset[0];
  77. #endif
  78. #endif
  79. int u = yuv1[x] - uv_offset, v = yuv2[x] - uv_offset;
  80. rgb0[x << SS_W] = av_clip_int16((y00 * cy + crv * v + rnd) >> sh);
  81. #if SS_W == 1
  82. rgb0[2 * x + 1] = av_clip_int16((y01 * cy + crv * v + rnd) >> sh);
  83. #if SS_H == 1
  84. rgb0[2 * x + rgb_stride] = av_clip_int16((y10 * cy + crv * v + rnd) >> sh);
  85. rgb0[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + crv * v + rnd) >> sh);
  86. #endif
  87. #endif
  88. rgb1[x << SS_W] = av_clip_int16((y00 * cy + cgu * u +
  89. cgv * v + rnd) >> sh);
  90. #if SS_W == 1
  91. rgb1[2 * x + 1] = av_clip_int16((y01 * cy + cgu * u +
  92. cgv * v + rnd) >> sh);
  93. #if SS_H == 1
  94. rgb1[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cgu * u +
  95. cgv * v + rnd) >> sh);
  96. rgb1[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cgu * u +
  97. cgv * v + rnd) >> sh);
  98. #endif
  99. #endif
  100. rgb2[x << SS_W] = av_clip_int16((y00 * cy + cbu * u + rnd) >> sh);
  101. #if SS_W == 1
  102. rgb2[2 * x + 1] = av_clip_int16((y01 * cy + cbu * u + rnd) >> sh);
  103. #if SS_H == 1
  104. rgb2[2 * x + rgb_stride] = av_clip_int16((y10 * cy + cbu * u + rnd) >> sh);
  105. rgb2[2 * x + rgb_stride + 1] = av_clip_int16((y11 * cy + cbu * u + rnd) >> sh);
  106. #endif
  107. #endif
  108. }
  109. yuv0 += (yuv_stride[0] * (1 << SS_H)) / sizeof(pixel);
  110. yuv1 += yuv_stride[1] / sizeof(pixel);
  111. yuv2 += yuv_stride[2] / sizeof(pixel);
  112. rgb0 += rgb_stride * (1 << SS_H);
  113. rgb1 += rgb_stride * (1 << SS_H);
  114. rgb2 += rgb_stride * (1 << SS_H);
  115. }
  116. }
  117. static void fn(rgb2yuv)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
  118. int16_t *rgb[3], ptrdiff_t s,
  119. int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
  120. const int16_t yuv_offset[8])
  121. {
  122. pixel **yuv = (pixel **) _yuv;
  123. pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
  124. const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
  125. int y, x;
  126. const int sh = 29 - BIT_DEPTH;
  127. const int rnd = 1 << (sh - 1);
  128. int cry = rgb2yuv_coeffs[0][0][0];
  129. int cgy = rgb2yuv_coeffs[0][1][0];
  130. int cby = rgb2yuv_coeffs[0][2][0];
  131. int cru = rgb2yuv_coeffs[1][0][0];
  132. int cgu = rgb2yuv_coeffs[1][1][0];
  133. int cburv = rgb2yuv_coeffs[1][2][0];
  134. int cgv = rgb2yuv_coeffs[2][1][0];
  135. int cbv = rgb2yuv_coeffs[2][2][0];
  136. ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel);
  137. const int uv_offset = 128 << (BIT_DEPTH - 8);
  138. av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
  139. w = AV_CEIL_RSHIFT(w, SS_W);
  140. h = AV_CEIL_RSHIFT(h, SS_H);
  141. for (y = 0; y < h; y++) {
  142. for (x = 0; x < w; x++) {
  143. int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W];
  144. #if SS_W == 1
  145. int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
  146. #if SS_H == 1
  147. int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s];
  148. int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s];
  149. #endif
  150. #endif
  151. yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] +
  152. ((r00 * cry + g00 * cgy +
  153. b00 * cby + rnd) >> sh));
  154. #if SS_W == 1
  155. yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] +
  156. ((r01 * cry + g01 * cgy +
  157. b01 * cby + rnd) >> sh));
  158. #if SS_H == 1
  159. yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] +
  160. ((r10 * cry + g10 * cgy +
  161. b10 * cby + rnd) >> sh));
  162. yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] +
  163. ((r11 * cry + g11 * cgy +
  164. b11 * cby + rnd) >> sh));
  165. #endif
  166. #endif
  167. yuv1[x] = av_clip_pixel(uv_offset +
  168. ((avg(r00, r01, r10, r11) * cru +
  169. avg(g00, g01, g10, g11) * cgu +
  170. avg(b00, b01, b10, b11) * cburv + rnd) >> sh));
  171. yuv2[x] = av_clip_pixel(uv_offset +
  172. ((avg(r00, r01, r10, r11) * cburv +
  173. avg(g00, g01, g10, g11) * cgv +
  174. avg(b00, b01, b10, b11) * cbv + rnd) >> sh));
  175. }
  176. yuv0 += s0 * (1 << SS_H);
  177. yuv1 += yuv_stride[1] / sizeof(pixel);
  178. yuv2 += yuv_stride[2] / sizeof(pixel);
  179. rgb0 += s * (1 << SS_H);
  180. rgb1 += s * (1 << SS_H);
  181. rgb2 += s * (1 << SS_H);
  182. }
  183. }
  184. /* floyd-steinberg dithering - for any mid-top pixel A in a 3x2 block of pixels:
  185. * 1 A 2
  186. * 3 4 5
  187. * the rounding error is distributed over the neighbouring pixels:
  188. * 2: 7/16th, 3: 3/16th, 4: 5/16th and 5: 1/16th
  189. */
  190. static void fn(rgb2yuv_fsb)(uint8_t *_yuv[3], const ptrdiff_t yuv_stride[3],
  191. int16_t *rgb[3], ptrdiff_t s,
  192. int w, int h, const int16_t rgb2yuv_coeffs[3][3][8],
  193. const int16_t yuv_offset[8],
  194. int *rnd_scratch[3][2])
  195. {
  196. pixel **yuv = (pixel **) _yuv;
  197. pixel *yuv0 = yuv[0], *yuv1 = yuv[1], *yuv2 = yuv[2];
  198. const int16_t *rgb0 = rgb[0], *rgb1 = rgb[1], *rgb2 = rgb[2];
  199. int y, x;
  200. const int sh = 29 - BIT_DEPTH;
  201. const int rnd = 1 << (sh - 1);
  202. int cry = rgb2yuv_coeffs[0][0][0];
  203. int cgy = rgb2yuv_coeffs[0][1][0];
  204. int cby = rgb2yuv_coeffs[0][2][0];
  205. int cru = rgb2yuv_coeffs[1][0][0];
  206. int cgu = rgb2yuv_coeffs[1][1][0];
  207. int cburv = rgb2yuv_coeffs[1][2][0];
  208. int cgv = rgb2yuv_coeffs[2][1][0];
  209. int cbv = rgb2yuv_coeffs[2][2][0];
  210. ptrdiff_t s0 = yuv_stride[0] / sizeof(pixel);
  211. const int uv_offset = 128 << (BIT_DEPTH - 8);
  212. unsigned mask = (1 << sh) - 1;
  213. for (x = 0; x < w; x++) {
  214. rnd_scratch[0][0][x] =
  215. rnd_scratch[0][1][x] = rnd;
  216. }
  217. av_assert2(rgb2yuv_coeffs[1][2][0] == rgb2yuv_coeffs[2][0][0]);
  218. w = AV_CEIL_RSHIFT(w, SS_W);
  219. h = AV_CEIL_RSHIFT(h, SS_H);
  220. for (x = 0; x < w; x++) {
  221. rnd_scratch[1][0][x] =
  222. rnd_scratch[1][1][x] =
  223. rnd_scratch[2][0][x] =
  224. rnd_scratch[2][1][x] = rnd;
  225. }
  226. for (y = 0; y < h; y++) {
  227. for (x = 0; x < w; x++) {
  228. int r00 = rgb0[x << SS_W], g00 = rgb1[x << SS_W], b00 = rgb2[x << SS_W];
  229. int y00;
  230. #if SS_W == 1
  231. int r01 = rgb0[x * 2 + 1], g01 = rgb1[x * 2 + 1], b01 = rgb2[x * 2 + 1];
  232. int y01;
  233. #if SS_H == 1
  234. int r10 = rgb0[x * 2 + 0 + s], g10 = rgb1[x * 2 + 0 + s], b10 = rgb2[x * 2 + 0 + s];
  235. int r11 = rgb0[x * 2 + 1 + s], g11 = rgb1[x * 2 + 1 + s], b11 = rgb2[x * 2 + 1 + s];
  236. int y10, y11;
  237. #endif
  238. #endif
  239. int u, v, diff;
  240. y00 = r00 * cry + g00 * cgy + b00 * cby + rnd_scratch[0][y & !SS_H][x << SS_W];
  241. diff = (y00 & mask) - rnd;
  242. yuv0[x << SS_W] = av_clip_pixel(yuv_offset[0] + (y00 >> sh));
  243. rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 1] += (diff * 7 + 8) >> 4;
  244. rnd_scratch[0][!(y & !SS_H)][(x << SS_W) - 1] += (diff * 3 + 8) >> 4;
  245. rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 0] += (diff * 5 + 8) >> 4;
  246. rnd_scratch[0][!(y & !SS_H)][(x << SS_W) + 1] += (diff * 1 + 8) >> 4;
  247. rnd_scratch[0][ (y & !SS_H)][(x << SS_W) + 0] = rnd;
  248. #if SS_W == 1
  249. y01 = r01 * cry + g01 * cgy + b01 * cby + rnd_scratch[0][y & !SS_H][x * 2 + 1];
  250. diff = (y01 & mask) - rnd;
  251. yuv0[x * 2 + 1] = av_clip_pixel(yuv_offset[0] + (y01 >> sh));
  252. rnd_scratch[0][ (y & !SS_H)][x * 2 + 2] += (diff * 7 + 8) >> 4;
  253. rnd_scratch[0][!(y & !SS_H)][x * 2 + 0] += (diff * 3 + 8) >> 4;
  254. rnd_scratch[0][!(y & !SS_H)][x * 2 + 1] += (diff * 5 + 8) >> 4;
  255. rnd_scratch[0][!(y & !SS_H)][x * 2 + 2] += (diff * 1 + 8) >> 4;
  256. rnd_scratch[0][ (y & !SS_H)][x * 2 + 1] = rnd;
  257. #if SS_H == 1
  258. y10 = r10 * cry + g10 * cgy + b10 * cby + rnd_scratch[0][1][x * 2 + 0];
  259. diff = (y10 & mask) - rnd;
  260. yuv0[x * 2 + 0 + s0] = av_clip_pixel(yuv_offset[0] + (y10 >> sh));
  261. rnd_scratch[0][1][x * 2 + 1] += (diff * 7 + 8) >> 4;
  262. rnd_scratch[0][0][x * 2 - 1] += (diff * 3 + 8) >> 4;
  263. rnd_scratch[0][0][x * 2 + 0] += (diff * 5 + 8) >> 4;
  264. rnd_scratch[0][0][x * 2 + 1] += (diff * 1 + 8) >> 4;
  265. rnd_scratch[0][1][x * 2 + 0] = rnd;
  266. y11 = r11 * cry + g11 * cgy + b11 * cby + rnd_scratch[0][1][x * 2 + 1];
  267. diff = (y11 & mask) - rnd;
  268. yuv0[x * 2 + 1 + s0] = av_clip_pixel(yuv_offset[0] + (y11 >> sh));
  269. rnd_scratch[0][1][x * 2 + 2] += (diff * 7 + 8) >> 4;
  270. rnd_scratch[0][0][x * 2 + 0] += (diff * 3 + 8) >> 4;
  271. rnd_scratch[0][0][x * 2 + 1] += (diff * 5 + 8) >> 4;
  272. rnd_scratch[0][0][x * 2 + 2] += (diff * 1 + 8) >> 4;
  273. rnd_scratch[0][1][x * 2 + 1] = rnd;
  274. #endif
  275. #endif
  276. u = avg(r00, r01, r10, r11) * cru +
  277. avg(g00, g01, g10, g11) * cgu +
  278. avg(b00, b01, b10, b11) * cburv + rnd_scratch[1][y & 1][x];
  279. diff = (u & mask) - rnd;
  280. yuv1[x] = av_clip_pixel(uv_offset + (u >> sh));
  281. rnd_scratch[1][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
  282. rnd_scratch[1][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
  283. rnd_scratch[1][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
  284. rnd_scratch[1][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
  285. rnd_scratch[1][ (y & 1)][x + 0] = rnd;
  286. v = avg(r00, r01, r10, r11) * cburv +
  287. avg(g00, g01, g10, g11) * cgv +
  288. avg(b00, b01, b10, b11) * cbv + rnd_scratch[2][y & 1][x];
  289. diff = (v & mask) - rnd;
  290. yuv2[x] = av_clip_pixel(uv_offset + (v >> sh));
  291. rnd_scratch[2][ (y & 1)][x + 1] += (diff * 7 + 8) >> 4;
  292. rnd_scratch[2][!(y & 1)][x - 1] += (diff * 3 + 8) >> 4;
  293. rnd_scratch[2][!(y & 1)][x + 0] += (diff * 5 + 8) >> 4;
  294. rnd_scratch[2][!(y & 1)][x + 1] += (diff * 1 + 8) >> 4;
  295. rnd_scratch[2][ (y & 1)][x + 0] = rnd;
  296. }
  297. yuv0 += s0 * (1 << SS_H);
  298. yuv1 += yuv_stride[1] / sizeof(pixel);
  299. yuv2 += yuv_stride[2] / sizeof(pixel);
  300. rgb0 += s * (1 << SS_H);
  301. rgb1 += s * (1 << SS_H);
  302. rgb2 += s * (1 << SS_H);
  303. }
  304. }
  305. #undef IN_BIT_DEPTH
  306. #undef OUT_BIT_DEPTH
  307. #define OUT_BIT_DEPTH BIT_DEPTH
  308. #define IN_BIT_DEPTH 8
  309. #include "colorspacedsp_yuv2yuv_template.c"
  310. #undef IN_BIT_DEPTH
  311. #define IN_BIT_DEPTH 10
  312. #include "colorspacedsp_yuv2yuv_template.c"
  313. #undef IN_BIT_DEPTH
  314. #define IN_BIT_DEPTH 12
  315. #include "colorspacedsp_yuv2yuv_template.c"