lut3d.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /*
  2. * Copyright (C) 2024 Niklas Haas
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <assert.h>
  21. #include <string.h>
  22. #include "libavutil/attributes.h"
  23. #include "libavutil/avassert.h"
  24. #include "libavutil/mem.h"
  25. #include "cms.h"
  26. #include "csputils.h"
  27. #include "lut3d.h"
  28. SwsLut3D *ff_sws_lut3d_alloc(void)
  29. {
  30. SwsLut3D *lut3d = av_malloc(sizeof(*lut3d));
  31. if (!lut3d)
  32. return NULL;
  33. lut3d->dynamic = false;
  34. return lut3d;
  35. }
  36. void ff_sws_lut3d_free(SwsLut3D **plut3d)
  37. {
  38. av_freep(plut3d);
  39. }
  40. bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output)
  41. {
  42. return fmt == AV_PIX_FMT_RGBA64;
  43. }
  44. enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output)
  45. {
  46. return AV_PIX_FMT_RGBA64;
  47. }
  48. /**
  49. * v0 and v1 are 'black' and 'white'
  50. * v2 and v3 are closest RGB/CMY vertices
  51. * x >= y >= z are relative weights
  52. */
  53. static av_always_inline
  54. v3u16_t barycentric(int shift, int x, int y, int z,
  55. v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3)
  56. {
  57. const int a = (1 << shift) - x;
  58. const int b = x - y;
  59. const int c = y - z;
  60. const int d = z;
  61. av_assert2(x >= y);
  62. av_assert2(y >= z);
  63. return (v3u16_t) {
  64. (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift,
  65. (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift,
  66. (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift,
  67. };
  68. }
  69. static av_always_inline
  70. v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx,
  71. int Rf, int Gf, int Bf)
  72. {
  73. const int shift = 16 - INPUT_LUT_BITS;
  74. const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1);
  75. const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1);
  76. const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1);
  77. const v3u16_t c000 = lut3d->input[Bx][Gx][Rx];
  78. const v3u16_t c111 = lut3d->input[Bn][Gn][Rn];
  79. if (Rf > Gf) {
  80. if (Gf > Bf) {
  81. const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
  82. const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
  83. return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111);
  84. } else if (Rf > Bf) {
  85. const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
  86. const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
  87. return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111);
  88. } else {
  89. const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
  90. const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
  91. return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111);
  92. }
  93. } else {
  94. if (Bf > Gf) {
  95. const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
  96. const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
  97. return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111);
  98. } else if (Bf > Rf) {
  99. const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
  100. const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
  101. return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111);
  102. } else {
  103. const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
  104. const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
  105. return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111);
  106. }
  107. }
  108. }
  109. static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb)
  110. {
  111. const int shift = 16 - INPUT_LUT_BITS;
  112. const int Rx = rgb.x >> shift;
  113. const int Gx = rgb.y >> shift;
  114. const int Bx = rgb.z >> shift;
  115. const int Rf = rgb.x & ((1 << shift) - 1);
  116. const int Gf = rgb.y & ((1 << shift) - 1);
  117. const int Bf = rgb.z & ((1 << shift) - 1);
  118. return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
  119. }
  120. static av_always_inline v3u16_t lookup_input8(const SwsLut3D *lut3d, v3u8_t rgb)
  121. {
  122. static_assert(INPUT_LUT_BITS <= 8, "INPUT_LUT_BITS must be <= 8");
  123. const int shift = 8 - INPUT_LUT_BITS;
  124. const int Rx = rgb.x >> shift;
  125. const int Gx = rgb.y >> shift;
  126. const int Bx = rgb.z >> shift;
  127. const int Rf = rgb.x & ((1 << shift) - 1);
  128. const int Gf = rgb.y & ((1 << shift) - 1);
  129. const int Bf = rgb.z & ((1 << shift) - 1);
  130. return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
  131. }
  132. /**
  133. * Note: These functions are scaled such that x == (1 << shift) corresponds to
  134. * a value of 1.0. This makes them suitable for use when interpolation LUT
  135. * entries with a fractional part that is just masked away from the index,
  136. * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of
  137. * just slightly *less* than 1.0.
  138. */
  139. static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift)
  140. {
  141. const int xi = (1 << shift) - x;
  142. return (v2u16_t) {
  143. (a.x * xi + b.x * x) >> shift,
  144. (a.y * xi + b.y * x) >> shift,
  145. };
  146. }
  147. static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift)
  148. {
  149. const int xi = (1 << shift) - x;
  150. return (v3u16_t) {
  151. (a.x * xi + b.x * x) >> shift,
  152. (a.y * xi + b.y * x) >> shift,
  153. (a.z * xi + b.z * x) >> shift,
  154. };
  155. }
  156. static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt)
  157. {
  158. const int Ishift = 16 - OUTPUT_LUT_BITS_I;
  159. const int Cshift = 16 - OUTPUT_LUT_BITS_PT;
  160. const int Ix = ipt.x >> Ishift;
  161. const int Px = ipt.y >> Cshift;
  162. const int Tx = ipt.z >> Cshift;
  163. const int If = ipt.x & ((1 << Ishift) - 1);
  164. const int Pf = ipt.y & ((1 << Cshift) - 1);
  165. const int Tf = ipt.z & ((1 << Cshift) - 1);
  166. const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I - 1);
  167. const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1);
  168. const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1);
  169. /* Trilinear interpolation */
  170. const v3u16_t c000 = lut3d->output[Tx][Px][Ix];
  171. const v3u16_t c001 = lut3d->output[Tx][Px][In];
  172. const v3u16_t c010 = lut3d->output[Tx][Pn][Ix];
  173. const v3u16_t c011 = lut3d->output[Tx][Pn][In];
  174. const v3u16_t c100 = lut3d->output[Tn][Px][Ix];
  175. const v3u16_t c101 = lut3d->output[Tn][Px][In];
  176. const v3u16_t c110 = lut3d->output[Tn][Pn][Ix];
  177. const v3u16_t c111 = lut3d->output[Tn][Pn][In];
  178. const v3u16_t c00 = lerp3u16(c000, c100, Tf, Cshift);
  179. const v3u16_t c10 = lerp3u16(c010, c110, Tf, Cshift);
  180. const v3u16_t c01 = lerp3u16(c001, c101, Tf, Cshift);
  181. const v3u16_t c11 = lerp3u16(c011, c111, Tf, Cshift);
  182. const v3u16_t c0 = lerp3u16(c00, c10, Pf, Cshift);
  183. const v3u16_t c1 = lerp3u16(c01, c11, Pf, Cshift);
  184. const v3u16_t c = lerp3u16(c0, c1, If, Ishift);
  185. return c;
  186. }
  187. static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt)
  188. {
  189. const int shift = 16 - TONE_LUT_BITS;
  190. const int Ix = ipt.x >> shift;
  191. const int If = ipt.x & ((1 << shift) - 1);
  192. const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1);
  193. const v2u16_t w0 = lut3d->tone_map[Ix];
  194. const v2u16_t w1 = lut3d->tone_map[In];
  195. const v2u16_t w = lerp2u16(w0, w1, If, shift);
  196. const int base = (1 << 15) - w.y;
  197. ipt.x = w.x;
  198. ipt.y = base + (ipt.y * w.y >> 15);
  199. ipt.z = base + (ipt.z * w.y >> 15);
  200. return ipt;
  201. }
  202. int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in,
  203. enum AVPixelFormat fmt_out, const SwsColorMap *map)
  204. {
  205. int ret;
  206. if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1))
  207. return AVERROR(EINVAL);
  208. lut3d->dynamic = map->src.frame_peak.num > 0;
  209. lut3d->map = *map;
  210. if (lut3d->dynamic) {
  211. ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0],
  212. &lut3d->output[0][0][0],
  213. INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I,
  214. OUTPUT_LUT_SIZE_PT, map);
  215. if (ret < 0)
  216. return ret;
  217. /* Make sure initial state is valid */
  218. ff_sws_lut3d_update(lut3d, &map->src);
  219. return 0;
  220. } else {
  221. return ff_sws_color_map_generate_static(&lut3d->input[0][0][0],
  222. INPUT_LUT_SIZE, map);
  223. }
  224. }
  225. void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src)
  226. {
  227. if (!new_src || !lut3d->dynamic)
  228. return;
  229. lut3d->map.src.frame_peak = new_src->frame_peak;
  230. lut3d->map.src.frame_avg = new_src->frame_avg;
  231. ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map);
  232. }
  233. void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride,
  234. uint8_t *out, int out_stride, int w, int h)
  235. {
  236. while (h--) {
  237. const uint16_t *in16 = (const uint16_t *) in;
  238. uint16_t *out16 = (uint16_t *) out;
  239. for (int x = 0; x < w; x++) {
  240. v3u16_t c = { in16[0], in16[1], in16[2] };
  241. c = lookup_input16(lut3d, c);
  242. if (lut3d->dynamic) {
  243. c = apply_tone_map(lut3d, c);
  244. c = lookup_output(lut3d, c);
  245. }
  246. out16[0] = c.x;
  247. out16[1] = c.y;
  248. out16[2] = c.z;
  249. out16[3] = in16[3];
  250. in16 += 4;
  251. out16 += 4;
  252. }
  253. in += in_stride;
  254. out += out_stride;
  255. }
  256. }