cms.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. /*
  2. * Copyright (C) 2024 Niklas Haas
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <math.h>
  21. #include <string.h>
  22. #include "libavutil/attributes.h"
  23. #include "libavutil/avassert.h"
  24. #include "libavutil/csp.h"
  25. #include "libavutil/slicethread.h"
  26. #include "cms.h"
  27. #include "csputils.h"
  28. #include "libswscale/swscale.h"
  29. #include "utils.h"
  30. bool ff_sws_color_map_noop(const SwsColorMap *map)
  31. {
  32. /* If the encoding space is different, we must go through a conversion */
  33. if (map->src.prim != map->dst.prim || map->src.trc != map->dst.trc)
  34. return false;
  35. /* If the black point changes, we have to perform black point compensation */
  36. if (av_cmp_q(map->src.min_luma, map->dst.min_luma))
  37. return false;
  38. switch (map->intent) {
  39. case SWS_INTENT_ABSOLUTE_COLORIMETRIC:
  40. case SWS_INTENT_RELATIVE_COLORIMETRIC:
  41. return ff_prim_superset(&map->dst.gamut, &map->src.gamut) &&
  42. av_cmp_q(map->src.max_luma, map->dst.max_luma) <= 0;
  43. case SWS_INTENT_PERCEPTUAL:
  44. case SWS_INTENT_SATURATION:
  45. return ff_prim_equal(&map->dst.gamut, &map->src.gamut) &&
  46. !av_cmp_q(map->src.max_luma, map->dst.max_luma);
  47. default:
  48. av_assert0(!"Invalid gamut mapping intent?");
  49. return true;
  50. }
  51. }
  52. /* Approximation of gamut hull at a given intensity level */
  53. static const float hull(float I)
  54. {
  55. return ((I - 6.0f) * I + 9.0f) * I;
  56. }
  57. /* For some minimal type safety, and code cleanliness */
  58. typedef struct RGB {
  59. float R, G, B; /* nits */
  60. } RGB;
  61. typedef struct IPT {
  62. float I, P, T;
  63. } IPT;
  64. typedef struct ICh {
  65. float I, C, h;
  66. } ICh;
  67. static av_always_inline ICh ipt2ich(IPT c)
  68. {
  69. return (ICh) {
  70. .I = c.I,
  71. .C = sqrtf(c.P * c.P + c.T * c.T),
  72. .h = atan2f(c.T, c.P),
  73. };
  74. }
  75. static av_always_inline IPT ich2ipt(ICh c)
  76. {
  77. return (IPT) {
  78. .I = c.I,
  79. .P = c.C * cosf(c.h),
  80. .T = c.C * sinf(c.h),
  81. };
  82. }
  83. /* Helper struct containing pre-computed cached values describing a gamut */
  84. typedef struct Gamut {
  85. SwsMatrix3x3 encoding2lms;
  86. SwsMatrix3x3 lms2encoding;
  87. SwsMatrix3x3 lms2content;
  88. SwsMatrix3x3 content2lms;
  89. av_csp_eotf_function eotf;
  90. av_csp_eotf_function eotf_inv;
  91. float Iavg_frame;
  92. float Imax_frame;
  93. float Imin, Imax;
  94. float Lb, Lw;
  95. AVCIExy wp;
  96. ICh peak; /* updated as needed in loop body when hue changes */
  97. } Gamut;
  98. static Gamut gamut_from_colorspace(SwsColor fmt)
  99. {
  100. const AVColorPrimariesDesc *encoding = av_csp_primaries_desc_from_id(fmt.prim);
  101. const AVColorPrimariesDesc content = {
  102. .prim = fmt.gamut,
  103. .wp = encoding->wp,
  104. };
  105. const float Lw = av_q2d(fmt.max_luma), Lb = av_q2d(fmt.min_luma);
  106. const float Imax = pq_oetf(Lw);
  107. return (Gamut) {
  108. .encoding2lms = ff_sws_ipt_rgb2lms(encoding),
  109. .lms2encoding = ff_sws_ipt_lms2rgb(encoding),
  110. .lms2content = ff_sws_ipt_lms2rgb(&content),
  111. .content2lms = ff_sws_ipt_rgb2lms(&content),
  112. .eotf = av_csp_itu_eotf(fmt.trc),
  113. .eotf_inv = av_csp_itu_eotf_inv(fmt.trc),
  114. .wp = encoding->wp,
  115. .Imin = pq_oetf(Lb),
  116. .Imax = Imax,
  117. .Imax_frame = fmt.frame_peak.den ? pq_oetf(av_q2d(fmt.frame_peak)) : Imax,
  118. .Iavg_frame = fmt.frame_avg.den ? pq_oetf(av_q2d(fmt.frame_avg)) : 0.0f,
  119. .Lb = Lb,
  120. .Lw = Lw,
  121. };
  122. }
  123. static av_always_inline IPT rgb2ipt(RGB c, const SwsMatrix3x3 rgb2lms)
  124. {
  125. const float L = rgb2lms.m[0][0] * c.R +
  126. rgb2lms.m[0][1] * c.G +
  127. rgb2lms.m[0][2] * c.B;
  128. const float M = rgb2lms.m[1][0] * c.R +
  129. rgb2lms.m[1][1] * c.G +
  130. rgb2lms.m[1][2] * c.B;
  131. const float S = rgb2lms.m[2][0] * c.R +
  132. rgb2lms.m[2][1] * c.G +
  133. rgb2lms.m[2][2] * c.B;
  134. const float Lp = pq_oetf(L);
  135. const float Mp = pq_oetf(M);
  136. const float Sp = pq_oetf(S);
  137. return (IPT) {
  138. .I = 0.4000f * Lp + 0.4000f * Mp + 0.2000f * Sp,
  139. .P = 4.4550f * Lp - 4.8510f * Mp + 0.3960f * Sp,
  140. .T = 0.8056f * Lp + 0.3572f * Mp - 1.1628f * Sp,
  141. };
  142. }
  143. static av_always_inline RGB ipt2rgb(IPT c, const SwsMatrix3x3 lms2rgb)
  144. {
  145. const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T;
  146. const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T;
  147. const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T;
  148. const float L = pq_eotf(Lp);
  149. const float M = pq_eotf(Mp);
  150. const float S = pq_eotf(Sp);
  151. return (RGB) {
  152. .R = lms2rgb.m[0][0] * L +
  153. lms2rgb.m[0][1] * M +
  154. lms2rgb.m[0][2] * S,
  155. .G = lms2rgb.m[1][0] * L +
  156. lms2rgb.m[1][1] * M +
  157. lms2rgb.m[1][2] * S,
  158. .B = lms2rgb.m[2][0] * L +
  159. lms2rgb.m[2][1] * M +
  160. lms2rgb.m[2][2] * S,
  161. };
  162. }
  163. static inline bool ingamut(IPT c, Gamut gamut)
  164. {
  165. const float min_rgb = gamut.Lb - 1e-4f;
  166. const float max_rgb = gamut.Lw + 1e-2f;
  167. const float Lp = c.I + 0.0975689f * c.P + 0.205226f * c.T;
  168. const float Mp = c.I - 0.1138760f * c.P + 0.133217f * c.T;
  169. const float Sp = c.I + 0.0326151f * c.P - 0.676887f * c.T;
  170. if (Lp < gamut.Imin || Lp > gamut.Imax ||
  171. Mp < gamut.Imin || Mp > gamut.Imax ||
  172. Sp < gamut.Imin || Sp > gamut.Imax)
  173. {
  174. /* Values outside legal LMS range */
  175. return false;
  176. } else {
  177. const float L = pq_eotf(Lp);
  178. const float M = pq_eotf(Mp);
  179. const float S = pq_eotf(Sp);
  180. RGB rgb = {
  181. .R = gamut.lms2content.m[0][0] * L +
  182. gamut.lms2content.m[0][1] * M +
  183. gamut.lms2content.m[0][2] * S,
  184. .G = gamut.lms2content.m[1][0] * L +
  185. gamut.lms2content.m[1][1] * M +
  186. gamut.lms2content.m[1][2] * S,
  187. .B = gamut.lms2content.m[2][0] * L +
  188. gamut.lms2content.m[2][1] * M +
  189. gamut.lms2content.m[2][2] * S,
  190. };
  191. return rgb.R >= min_rgb && rgb.R <= max_rgb &&
  192. rgb.G >= min_rgb && rgb.G <= max_rgb &&
  193. rgb.B >= min_rgb && rgb.B <= max_rgb;
  194. }
  195. }
  196. static const float maxDelta = 5e-5f;
  197. // Find gamut intersection using specified bounds
  198. static inline ICh
  199. desat_bounded(float I, float h, float Cmin, float Cmax, Gamut gamut)
  200. {
  201. if (I <= gamut.Imin)
  202. return (ICh) { .I = gamut.Imin, .C = 0, .h = h };
  203. else if (I >= gamut.Imax)
  204. return (ICh) { .I = gamut.Imax, .C = 0, .h = h };
  205. else {
  206. const float maxDI = I * maxDelta;
  207. ICh res = { .I = I, .C = (Cmin + Cmax) / 2, .h = h };
  208. do {
  209. if (ingamut(ich2ipt(res), gamut)) {
  210. Cmin = res.C;
  211. } else {
  212. Cmax = res.C;
  213. }
  214. res.C = (Cmin + Cmax) / 2;
  215. } while (Cmax - Cmin > maxDI);
  216. return res;
  217. }
  218. }
  219. // Finds maximally saturated in-gamut color (for given hue)
  220. static inline ICh saturate(float hue, Gamut gamut)
  221. {
  222. static const float invphi = 0.6180339887498948f;
  223. static const float invphi2 = 0.38196601125010515f;
  224. ICh lo = { .I = gamut.Imin, .h = hue };
  225. ICh hi = { .I = gamut.Imax, .h = hue };
  226. float de = hi.I - lo.I;
  227. ICh a = { .I = lo.I + invphi2 * de };
  228. ICh b = { .I = lo.I + invphi * de };
  229. a = desat_bounded(a.I, hue, 0.0f, 0.5f, gamut);
  230. b = desat_bounded(b.I, hue, 0.0f, 0.5f, gamut);
  231. while (de > maxDelta) {
  232. de *= invphi;
  233. if (a.C > b.C) {
  234. hi = b;
  235. b = a;
  236. a.I = lo.I + invphi2 * de;
  237. a = desat_bounded(a.I, hue, lo.C - maxDelta, 0.5f, gamut);
  238. } else {
  239. lo = a;
  240. a = b;
  241. b.I = lo.I + invphi * de;
  242. b = desat_bounded(b.I, hue, hi.C - maxDelta, 0.5f, gamut);
  243. }
  244. }
  245. return a.C > b.C ? a : b;
  246. }
  247. static float softclip(float value, float source, float target)
  248. {
  249. const float j = SOFTCLIP_KNEE;
  250. float peak, x, a, b, scale;
  251. if (!target)
  252. return 0.0f;
  253. peak = source / target;
  254. x = fminf(value / target, peak);
  255. if (x <= j || peak <= 1.0)
  256. return value;
  257. /* Apply simple mobius function */
  258. a = -j*j * (peak - 1.0f) / (j*j - 2.0f * j + peak);
  259. b = (j*j - 2.0f * j * peak + peak) / fmaxf(1e-6f, peak - 1.0f);
  260. scale = (b*b + 2.0f * b*j + j*j) / (b - a);
  261. return scale * (x + a) / (x + b) * target;
  262. }
  263. /**
  264. * Something like fmixf(base, c, x) but follows an exponential curve, note
  265. * that this can be used to extend 'c' outwards for x > 1
  266. */
  267. static inline ICh mix_exp(ICh c, float x, float gamma, float base)
  268. {
  269. return (ICh) {
  270. .I = base + (c.I - base) * powf(x, gamma),
  271. .C = c.C * x,
  272. .h = c.h,
  273. };
  274. }
  275. /**
  276. * Drop gamma for colors approaching black and achromatic to avoid numerical
  277. * instabilities, and excessive brightness boosting of grain, while also
  278. * strongly boosting gamma for values exceeding the target peak
  279. */
  280. static inline float scale_gamma(float gamma, ICh ich, Gamut gamut)
  281. {
  282. const float Imin = gamut.Imin;
  283. const float Irel = fmaxf((ich.I - Imin) / (gamut.peak.I - Imin), 0.0f);
  284. return gamma * powf(Irel, 3) * fminf(ich.C / gamut.peak.C, 1.0f);
  285. }
  286. /* Clip a color along the exponential curve given by `gamma` */
  287. static inline IPT clip_gamma(IPT ipt, float gamma, Gamut gamut)
  288. {
  289. float lo = 0.0f, hi = 1.0f, x = 0.5f;
  290. const float maxDI = fmaxf(ipt.I * maxDelta, 1e-7f);
  291. ICh ich;
  292. if (ipt.I <= gamut.Imin)
  293. return (IPT) { .I = gamut.Imin };
  294. if (ingamut(ipt, gamut))
  295. return ipt;
  296. ich = ipt2ich(ipt);
  297. if (!gamma)
  298. return ich2ipt(desat_bounded(ich.I, ich.h, 0.0f, ich.C, gamut));
  299. gamma = scale_gamma(gamma, ich, gamut);
  300. do {
  301. ICh test = mix_exp(ich, x, gamma, gamut.peak.I);
  302. if (ingamut(ich2ipt(test), gamut)) {
  303. lo = x;
  304. } else {
  305. hi = x;
  306. }
  307. x = (lo + hi) / 2.0f;
  308. } while (hi - lo > maxDI);
  309. return ich2ipt(mix_exp(ich, x, gamma, gamut.peak.I));
  310. }
  311. typedef struct CmsCtx CmsCtx;
  312. struct CmsCtx {
  313. /* Tone mapping parameters */
  314. float Qa, Qb, Qc, Pa, Pb, src_knee, dst_knee; /* perceptual */
  315. float I_scale, I_offset; /* linear methods */
  316. /* Colorspace parameters */
  317. Gamut src;
  318. Gamut tmp; /* after tone mapping */
  319. Gamut dst;
  320. SwsMatrix3x3 adaptation; /* for absolute intent */
  321. /* Invocation parameters */
  322. SwsColorMap map;
  323. float (*tone_map)(const CmsCtx *ctx, float I);
  324. IPT (*adapt_colors)(const CmsCtx *ctx, IPT ipt);
  325. v3u16_t *input;
  326. v3u16_t *output;
  327. /* Threading parameters */
  328. int slice_size;
  329. int size_input;
  330. int size_output_I;
  331. int size_output_PT;
  332. };
  333. /**
  334. * Helper function to pick a knee point based on the * HDR10+ brightness
  335. * metadata and scene brightness average matching.
  336. *
  337. * Inspired by SMPTE ST2094-10, with some modifications
  338. */
  339. static void st2094_pick_knee(float src_max, float src_min, float src_avg,
  340. float dst_max, float dst_min,
  341. float *out_src_knee, float *out_dst_knee)
  342. {
  343. const float min_knee = PERCEPTUAL_KNEE_MIN;
  344. const float max_knee = PERCEPTUAL_KNEE_MAX;
  345. const float def_knee = PERCEPTUAL_KNEE_DEF;
  346. const float src_knee_min = fmixf(src_min, src_max, min_knee);
  347. const float src_knee_max = fmixf(src_min, src_max, max_knee);
  348. const float dst_knee_min = fmixf(dst_min, dst_max, min_knee);
  349. const float dst_knee_max = fmixf(dst_min, dst_max, max_knee);
  350. float src_knee, target, adapted, tuning, adaptation, dst_knee;
  351. /* Choose source knee based on dynamic source scene brightness */
  352. src_knee = src_avg ? src_avg : fmixf(src_min, src_max, def_knee);
  353. src_knee = av_clipf(src_knee, src_knee_min, src_knee_max);
  354. /* Choose target adaptation point based on linearly re-scaling source knee */
  355. target = (src_knee - src_min) / (src_max - src_min);
  356. adapted = fmixf(dst_min, dst_max, target);
  357. /**
  358. * Choose the destnation knee by picking the perceptual adaptation point
  359. * between the source knee and the desired target. This moves the knee
  360. * point, on the vertical axis, closer to the 1:1 (neutral) line.
  361. *
  362. * Adjust the adaptation strength towards 1 based on how close the knee
  363. * point is to its extreme values (min/max knee)
  364. */
  365. tuning = smoothstepf(max_knee, def_knee, target) *
  366. smoothstepf(min_knee, def_knee, target);
  367. adaptation = fmixf(1.0f, PERCEPTUAL_ADAPTATION, tuning);
  368. dst_knee = fmixf(src_knee, adapted, adaptation);
  369. dst_knee = av_clipf(dst_knee, dst_knee_min, dst_knee_max);
  370. *out_src_knee = src_knee;
  371. *out_dst_knee = dst_knee;
  372. }
  373. static void tone_map_setup(CmsCtx *ctx, bool dynamic)
  374. {
  375. const float dst_min = ctx->dst.Imin;
  376. const float dst_max = ctx->dst.Imax;
  377. const float src_min = ctx->src.Imin;
  378. const float src_max = dynamic ? ctx->src.Imax_frame : ctx->src.Imax;
  379. const float src_avg = dynamic ? ctx->src.Iavg_frame : 0.0f;
  380. float slope, ratio, in_min, in_max, out_min, out_max, t;
  381. switch (ctx->map.intent) {
  382. case SWS_INTENT_PERCEPTUAL:
  383. st2094_pick_knee(src_max, src_min, src_avg, dst_max, dst_min,
  384. &ctx->src_knee, &ctx->dst_knee);
  385. /* Solve for linear knee (Pa = 0) */
  386. slope = (ctx->dst_knee - dst_min) / (ctx->src_knee - src_min);
  387. /**
  388. * Tune the slope at the knee point slightly: raise it to a user-provided
  389. * gamma exponent, multiplied by an extra tuning coefficient designed to
  390. * make the slope closer to 1.0 when the difference in peaks is low, and
  391. * closer to linear when the difference between peaks is high.
  392. */
  393. ratio = src_max / dst_max - 1.0f;
  394. ratio = av_clipf(SLOPE_TUNING * ratio, SLOPE_OFFSET, 1.0f + SLOPE_OFFSET);
  395. slope = powf(slope, (1.0f - PERCEPTUAL_CONTRAST) * ratio);
  396. /* Normalize everything the pivot to make the math easier */
  397. in_min = src_min - ctx->src_knee;
  398. in_max = src_max - ctx->src_knee;
  399. out_min = dst_min - ctx->dst_knee;
  400. out_max = dst_max - ctx->dst_knee;
  401. /**
  402. * Solve P of order 2 for:
  403. * P(in_min) = out_min
  404. * P'(0.0) = slope
  405. * P(0.0) = 0.0
  406. */
  407. ctx->Pa = (out_min - slope * in_min) / (in_min * in_min);
  408. ctx->Pb = slope;
  409. /**
  410. * Solve Q of order 3 for:
  411. * Q(in_max) = out_max
  412. * Q''(in_max) = 0.0
  413. * Q(0.0) = 0.0
  414. * Q'(0.0) = slope
  415. */
  416. t = 2 * in_max * in_max;
  417. ctx->Qa = (slope * in_max - out_max) / (in_max * t);
  418. ctx->Qb = -3 * (slope * in_max - out_max) / t;
  419. ctx->Qc = slope;
  420. break;
  421. case SWS_INTENT_SATURATION:
  422. /* Linear stretch */
  423. ctx->I_scale = (dst_max - dst_min) / (src_max - src_min);
  424. ctx->I_offset = dst_min - src_min * ctx->I_scale;
  425. break;
  426. case SWS_INTENT_RELATIVE_COLORIMETRIC:
  427. /* Pure black point adaptation */
  428. ctx->I_scale = src_max / (src_max - src_min) /
  429. (dst_max / (dst_max - dst_min));
  430. ctx->I_offset = dst_min - src_min * ctx->I_scale;
  431. break;
  432. case SWS_INTENT_ABSOLUTE_COLORIMETRIC:
  433. /* Hard clip */
  434. ctx->I_scale = 1.0f;
  435. ctx->I_offset = 0.0f;
  436. break;
  437. }
  438. }
  439. static av_always_inline IPT tone_map_apply(const CmsCtx *ctx, IPT ipt)
  440. {
  441. float I = ipt.I, desat;
  442. if (ctx->map.intent == SWS_INTENT_PERCEPTUAL) {
  443. const float Pa = ctx->Pa, Pb = ctx->Pb;
  444. const float Qa = ctx->Qa, Qb = ctx->Qb, Qc = ctx->Qc;
  445. I -= ctx->src_knee;
  446. I = I > 0 ? ((Qa * I + Qb) * I + Qc) * I : (Pa * I + Pb) * I;
  447. I += ctx->dst_knee;
  448. } else {
  449. I = ctx->I_scale * I + ctx->I_offset;
  450. }
  451. /**
  452. * Avoids raising saturation excessively when raising brightness, and
  453. * also desaturates when reducing brightness greatly to account for the
  454. * reduction in gamut volume.
  455. */
  456. desat = fminf(ipt.I / I, hull(I) / hull(ipt.I));
  457. return (IPT) {
  458. .I = I,
  459. .P = ipt.P * desat,
  460. .T = ipt.T * desat,
  461. };
  462. }
  463. static IPT perceptual(const CmsCtx *ctx, IPT ipt)
  464. {
  465. ICh ich = ipt2ich(ipt);
  466. IPT mapped = rgb2ipt(ipt2rgb(ipt, ctx->tmp.lms2content), ctx->dst.content2lms);
  467. RGB rgb;
  468. float maxRGB;
  469. /* Protect in gamut region */
  470. const float maxC = fmaxf(ctx->tmp.peak.C, ctx->dst.peak.C);
  471. float k = smoothstepf(PERCEPTUAL_DEADZONE, 1.0f, ich.C / maxC);
  472. k *= PERCEPTUAL_STRENGTH;
  473. ipt.I = fmixf(ipt.I, mapped.I, k);
  474. ipt.P = fmixf(ipt.P, mapped.P, k);
  475. ipt.T = fmixf(ipt.T, mapped.T, k);
  476. rgb = ipt2rgb(ipt, ctx->dst.lms2content);
  477. maxRGB = fmaxf(rgb.R, fmaxf(rgb.G, rgb.B));
  478. rgb.R = fmaxf(softclip(rgb.R, maxRGB, ctx->dst.Lw), ctx->dst.Lb);
  479. rgb.G = fmaxf(softclip(rgb.G, maxRGB, ctx->dst.Lw), ctx->dst.Lb);
  480. rgb.B = fmaxf(softclip(rgb.B, maxRGB, ctx->dst.Lw), ctx->dst.Lb);
  481. return rgb2ipt(rgb, ctx->dst.content2lms);
  482. }
  483. static IPT relative(const CmsCtx *ctx, IPT ipt)
  484. {
  485. return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst);
  486. }
  487. static IPT absolute(const CmsCtx *ctx, IPT ipt)
  488. {
  489. RGB rgb = ipt2rgb(ipt, ctx->dst.lms2encoding);
  490. float c[3] = { rgb.R, rgb.G, rgb.B };
  491. ff_sws_matrix3x3_apply(&ctx->adaptation, c);
  492. ipt = rgb2ipt((RGB) { c[0], c[1], c[2] }, ctx->dst.encoding2lms);
  493. return clip_gamma(ipt, COLORIMETRIC_GAMMA, ctx->dst);
  494. }
  495. static IPT saturation(const CmsCtx * ctx, IPT ipt)
  496. {
  497. RGB rgb = ipt2rgb(ipt, ctx->tmp.lms2content);
  498. return rgb2ipt(rgb, ctx->dst.content2lms);
  499. }
  500. static av_always_inline av_const uint16_t av_round16f(float x)
  501. {
  502. return av_clip_uint16(x * (UINT16_MAX - 1) + 0.5f);
  503. }
  504. /* Call this whenever the hue changes inside the loop body */
  505. static av_always_inline void update_hue_peaks(CmsCtx *ctx, float P, float T)
  506. {
  507. const float hue = atan2f(T, P);
  508. switch (ctx->map.intent) {
  509. case SWS_INTENT_PERCEPTUAL:
  510. ctx->tmp.peak = saturate(hue, ctx->tmp);
  511. /* fall through */
  512. case SWS_INTENT_RELATIVE_COLORIMETRIC:
  513. case SWS_INTENT_ABSOLUTE_COLORIMETRIC:
  514. ctx->dst.peak = saturate(hue, ctx->dst);
  515. return;
  516. default:
  517. return;
  518. }
  519. }
  520. static void generate_slice(void *priv, int jobnr, int threadnr, int nb_jobs,
  521. int nb_threads)
  522. {
  523. CmsCtx ctx = *(const CmsCtx *) priv;
  524. const int slice_start = jobnr * ctx.slice_size;
  525. const int slice_stride = ctx.size_input * ctx.size_input;
  526. const int slice_end = FFMIN((jobnr + 1) * ctx.slice_size, ctx.size_input);
  527. v3u16_t *input = &ctx.input[slice_start * slice_stride];
  528. const int output_slice_h = (ctx.size_output_PT + nb_jobs - 1) / nb_jobs;
  529. const int output_start = jobnr * output_slice_h;
  530. const int output_stride = ctx.size_output_PT * ctx.size_output_I;
  531. const int output_end = FFMIN((jobnr + 1) * output_slice_h, ctx.size_output_PT);
  532. v3u16_t *output = ctx.output ? &ctx.output[output_start * output_stride] : NULL;
  533. const float I_scale = 1.0f / (ctx.src.Imax - ctx.src.Imin);
  534. const float I_offset = -ctx.src.Imin * I_scale;
  535. const float PT_offset = (float) (1 << 15) / (UINT16_MAX - 1);
  536. const float input_scale = 1.0f / (ctx.size_input - 1);
  537. const float output_scale_PT = 1.0f / (ctx.size_output_PT - 1);
  538. const float output_scale_I = (ctx.tmp.Imax - ctx.tmp.Imin) /
  539. (ctx.size_output_I - 1);
  540. for (int Bx = slice_start; Bx < slice_end; Bx++) {
  541. const float B = input_scale * Bx;
  542. for (int Gx = 0; Gx < ctx.size_input; Gx++) {
  543. const float G = input_scale * Gx;
  544. for (int Rx = 0; Rx < ctx.size_input; Rx++) {
  545. double c[3] = { input_scale * Rx, G, B };
  546. RGB rgb;
  547. IPT ipt;
  548. ctx.src.eotf(ctx.src.Lw, ctx.src.Lb, c);
  549. rgb = (RGB) { c[0], c[1], c[2] };
  550. ipt = rgb2ipt(rgb, ctx.src.encoding2lms);
  551. if (output) {
  552. /* Save intermediate value to 3DLUT */
  553. *input++ = (v3u16_t) {
  554. av_round16f(I_scale * ipt.I + I_offset),
  555. av_round16f(ipt.P + PT_offset),
  556. av_round16f(ipt.T + PT_offset),
  557. };
  558. } else {
  559. update_hue_peaks(&ctx, ipt.P, ipt.T);
  560. ipt = tone_map_apply(&ctx, ipt);
  561. ipt = ctx.adapt_colors(&ctx, ipt);
  562. rgb = ipt2rgb(ipt, ctx.dst.lms2encoding);
  563. c[0] = rgb.R;
  564. c[1] = rgb.G;
  565. c[2] = rgb.B;
  566. ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c);
  567. *input++ = (v3u16_t) {
  568. av_round16f(c[0]),
  569. av_round16f(c[1]),
  570. av_round16f(c[2]),
  571. };
  572. }
  573. }
  574. }
  575. }
  576. if (!output)
  577. return;
  578. /* Generate split gamut mapping LUT */
  579. for (int Tx = output_start; Tx < output_end; Tx++) {
  580. const float T = output_scale_PT * Tx - PT_offset;
  581. for (int Px = 0; Px < ctx.size_output_PT; Px++) {
  582. const float P = output_scale_PT * Px - PT_offset;
  583. update_hue_peaks(&ctx, P, T);
  584. for (int Ix = 0; Ix < ctx.size_output_I; Ix++) {
  585. const float I = output_scale_I * Ix + ctx.tmp.Imin;
  586. IPT ipt = ctx.adapt_colors(&ctx, (IPT) { I, P, T });
  587. RGB rgb = ipt2rgb(ipt, ctx.dst.lms2encoding);
  588. double c[3] = { rgb.R, rgb.G, rgb.B };
  589. ctx.dst.eotf_inv(ctx.dst.Lw, ctx.dst.Lb, c);
  590. *output++ = (v3u16_t) {
  591. av_round16f(c[0]),
  592. av_round16f(c[1]),
  593. av_round16f(c[2]),
  594. };
  595. }
  596. }
  597. }
  598. }
  599. int ff_sws_color_map_generate_static(v3u16_t *lut, int size, const SwsColorMap *map)
  600. {
  601. return ff_sws_color_map_generate_dynamic(lut, NULL, size, 1, 1, map);
  602. }
  603. int ff_sws_color_map_generate_dynamic(v3u16_t *input, v3u16_t *output,
  604. int size_input, int size_I, int size_PT,
  605. const SwsColorMap *map)
  606. {
  607. AVSliceThread *slicethread;
  608. int ret, num_slices;
  609. CmsCtx ctx = {
  610. .map = *map,
  611. .input = input,
  612. .output = output,
  613. .size_input = size_input,
  614. .size_output_I = size_I,
  615. .size_output_PT = size_PT,
  616. .src = gamut_from_colorspace(map->src),
  617. .dst = gamut_from_colorspace(map->dst),
  618. };
  619. switch (ctx.map.intent) {
  620. case SWS_INTENT_PERCEPTUAL: ctx.adapt_colors = perceptual; break;
  621. case SWS_INTENT_RELATIVE_COLORIMETRIC: ctx.adapt_colors = relative; break;
  622. case SWS_INTENT_SATURATION: ctx.adapt_colors = saturation; break;
  623. case SWS_INTENT_ABSOLUTE_COLORIMETRIC: ctx.adapt_colors = absolute; break;
  624. default: return AVERROR(EINVAL);
  625. }
  626. if (!output) {
  627. /* Tone mapping is handled in a separate step when using dynamic TM */
  628. tone_map_setup(&ctx, false);
  629. }
  630. /* Intermediate color space after tone mapping */
  631. ctx.tmp = ctx.src;
  632. ctx.tmp.Lb = ctx.dst.Lb;
  633. ctx.tmp.Lw = ctx.dst.Lw;
  634. ctx.tmp.Imin = ctx.dst.Imin;
  635. ctx.tmp.Imax = ctx.dst.Imax;
  636. if (ctx.map.intent == SWS_INTENT_ABSOLUTE_COLORIMETRIC) {
  637. /**
  638. * The IPT transform already implies an explicit white point adaptation
  639. * from src to dst, so to get absolute colorimetric semantics we have
  640. * to explicitly undo this adaptation with a * corresponding inverse.
  641. */
  642. ctx.adaptation = ff_sws_get_adaptation(&ctx.map.dst.gamut,
  643. ctx.dst.wp, ctx.src.wp);
  644. }
  645. ret = avpriv_slicethread_create(&slicethread, &ctx, generate_slice, NULL, 0);
  646. if (ret < 0)
  647. return ret;
  648. ctx.slice_size = (ctx.size_input + ret - 1) / ret;
  649. num_slices = (ctx.size_input + ctx.slice_size - 1) / ctx.slice_size;
  650. avpriv_slicethread_execute(slicethread, num_slices, 0);
  651. avpriv_slicethread_free(&slicethread);
  652. return 0;
  653. }
  654. void ff_sws_tone_map_generate(v2u16_t *lut, int size, const SwsColorMap *map)
  655. {
  656. CmsCtx ctx = {
  657. .map = *map,
  658. .src = gamut_from_colorspace(map->src),
  659. .dst = gamut_from_colorspace(map->dst),
  660. };
  661. const float src_scale = (ctx.src.Imax - ctx.src.Imin) / (size - 1);
  662. const float src_offset = ctx.src.Imin;
  663. const float dst_scale = 1.0f / (ctx.dst.Imax - ctx.dst.Imin);
  664. const float dst_offset = -ctx.dst.Imin * dst_scale;
  665. tone_map_setup(&ctx, true);
  666. for (int i = 0; i < size; i++) {
  667. const float I = src_scale * i + src_offset;
  668. IPT ipt = tone_map_apply(&ctx, (IPT) { I, 1.0f });
  669. lut[i] = (v2u16_t) {
  670. av_round16f(dst_scale * ipt.I + dst_offset),
  671. av_clip_uint16(ipt.P * (1 << 15) + 0.5f),
  672. };
  673. }
  674. }